Fossil SCM

Enhance the diff logic so that it can suppress differences that do not match a regular expression. This capability is so far only exposed on the test-diff command.

drh 2013-01-02 00:58 trunk

Commit 97a7b924c0e101782831fe7a011ad30a8380e800

Parent d74698ec28794dc…

8 files changed +128 -35 +2 -2 +3 -3 +1 -1 +1 -1 +2 -2 +87 -64 +1 -1

~ src/diff.c ~ src/diffcmd.c ~ src/info.c ~ src/json_diff.c ~ src/json_wiki.c ~ src/merge3.c ~ src/regexp.c ~ src/wiki.c

M src/diff.c

+128 -35

		--- src/diff.c
		+++ src/diff.c
		@@ -412,24 +412,45 @@
412	412	** Return true if two DLine elements are identical.
413	413	*/
414	414	static int same_dline(DLine pA, DLine pB){
415	415	return pA->h==pB->h && memcmp(pA->z,pB->z,pA->h & LENGTH_MASK)==0;
416	416	}
	417	+
	418	+/*
	419	+** Return true if the regular expression *pRe matches any of the
	420	+** N dlines
	421	+*/
	422	+static int re_dline_match(
	423	+ ReCompiled pRe, / The regular expression to be matched */
	424	+ DLine aDLine, / First of N DLines to compare against */
	425	+ int N /* Number of DLines to check */
	426	+){
	427	+ while( N-- ){
	428	+ if( re_exec(pRe, aDLine->z, LENGTH(aDLine)) ){
	429	+ return 1;
	430	+ }
	431	+ aDLine++;
	432	+ }
	433	+ return 0;
	434	+}
417	435
418	436	/*
419	437	** Append a single line of context-diff output to pOut.
420	438	*/
421	439	static void appendDiffLine(
422	440	Blob pOut, / Where to write the line of output */
423	441	char cPrefix, /* One of " ", "+", or "-" */
424	442	DLine pLine, / The line to be output */
425		- int html /* True if generating HTML. False for plain text */
	443	+ int html, /* True if generating HTML. False for plain text */
	444	+ ReCompiled pRe / Colorize only if line matches this Regex */
426	445	){
427	446	blob_append(pOut, &cPrefix, 1);
428	447	if( html ){
429	448	char *zHtml;
430		- if( cPrefix=='+' ){
	449	+ if( pRe && re_dline_match(pRe, pLine, 1)==0 ){
	450	+ cPrefix = ' ';
	451	+ }else if( cPrefix=='+' ){
431	452	blob_append(pOut, "<span class=\"diffadd\">", -1);
432	453	}else if( cPrefix=='-' ){
433	454	blob_append(pOut, "<span class=\"diffrm\">", -1);
434	455	}
435	456	zHtml = htmlize(pLine->z, (pLine->h & LENGTH_MASK));
		@@ -463,21 +484,19 @@
463	484	blob_append(pOut, " ", 8);
464	485	}
465	486	if( html ) blob_append(pOut, "</span>", -1);
466	487	}
467	488
468		-
469	489	/*
470	490	** Given a raw diff p[] in which the p->aEdit[] array has been filled
471	491	** in, compute a context diff into pOut.
472	492	*/
473	493	static void contextDiff(
474	494	DContext p, / The difference */
475	495	Blob pOut, / Output a context diff to here */
476		- int nContext, /* Number of lines of context */
477		- int showLn, /* Show line numbers */
478		- int html /* Render as HTML */
	496	+ ReCompiled pRe, / Only show changes that match this regex */
	497	+ u64 diffFlags /* Flags controlling the diff format */
479	498	){
480	499	DLine A; / Left side of the diff */
481	500	DLine B; / Right side of the diff */
482	501	int a = 0; /* Index of next line in A[] */
483	502	int b = 0; /* Index of next line in B[] */
		@@ -488,11 +507,18 @@
488	507	int na, nb; /* Number of lines shown from A and B */
489	508	int i, j; /* Loop counters */
490	509	int m; /* Number of lines to output */
491	510	int skip; /* Number of lines to skip */
492	511	int nChunk = 0; /* Number of diff chunks seen so far */
	512	+ int nContext; /* Number of lines of context */
	513	+ int showLn; /* Show line numbers */
	514	+ int html; /* Render as HTML */
	515	+ int showDivider = 0; /* True to show the divider between diff blocks */
493	516
	517	+ nContext = diff_context_lines(diffFlags);
	518	+ showLn = (diffFlags & DIFF_LINENO)!=0;
	519	+ html = (diffFlags & DIFF_HTML)!=0;
494	520	A = p->aFrom;
495	521	B = p->aTo;
496	522	R = p->aEdit;
497	523	mxr = p->nEdit;
498	524	while( mxr>2 && R[mxr-1]==0 && R[mxr-2]==0 ){ mxr -= 3; }
		@@ -499,10 +525,35 @@
499	525	for(r=0; r<mxr; r += 3*nr){
500	526	/* Figure out how many triples to show in a single block */
501	527	for(nr=1; R[r+nr3]>0 && R[r+nr3]<nContext*2; nr++){}
502	528	/* printf("r=%d nr=%d\n", r, nr); */
503	529
	530	+ /* If there is a regex, skip this block (generate no diff output)
	531	+ ** if the regex matches or does not match both insert and delete.
	532	+ ** Only display the block if one side matches but the other side does
	533	+ ** not.
	534	+ */
	535	+ if( pRe ){
	536	+ int hideBlock = 1;
	537	+ int xa = a, xb = b;
	538	+ for(i=0; hideBlock && i<nr; i++){
	539	+ int c1, c2;
	540	+ xa += R[r+i*3];
	541	+ xb += R[r+i*3];
	542	+ c1 = re_dline_match(pRe, &A[xa], R[r+i*3+1]);
	543	+ c2 = re_dline_match(pRe, &B[xb], R[r+i*3+2]);
	544	+ hideBlock = c1==c2;
	545	+ xa += R[r+i*3+1];
	546	+ xb += R[r+i*3+2];
	547	+ }
	548	+ if( hideBlock ){
	549	+ a = xa;
	550	+ b = xb;
	551	+ continue;
	552	+ }
	553	+ }
	554	+
504	555	/* For the current block comprising nr triples, figure out
505	556	** how many lines of A and B are to be displayed
506	557	*/
507	558	if( R[r]>nContext ){
508	559	na = nb = nContext;
		@@ -531,12 +582,13 @@
531	582	** context diff that contains line numbers, show the separator from
532	583	** the previous block.
533	584	*/
534	585	nChunk++;
535	586	if( showLn ){
536		- if( r==0 ){
	587	+ if( !showDivider ){
537	588	/* Do not show a top divider */
	589	+ showDivider = 1;
538	590	}else if( html ){
539	591	blob_appendf(pOut, "<span class=\"diffhr\">%.80c</span>\n", '.');
540	592	blob_appendf(pOut, "<a name=\"chunk%d\"></a>\n", nChunk);
541	593	}else{
542	594	blob_appendf(pOut, "%.80c\n", '.');
		@@ -559,34 +611,36 @@
559	611	a += skip;
560	612	b += skip;
561	613	m = R[r] - skip;
562	614	for(j=0; j<m; j++){
563	615	if( showLn ) appendDiffLineno(pOut, a+j+1, b+j+1, html);
564		- appendDiffLine(pOut, ' ', &A[a+j], html);
	616	+ appendDiffLine(pOut, ' ', &A[a+j], html, 0);
565	617	}
566	618	a += m;
567	619	b += m;
568	620
569	621	/* Show the differences */
570	622	for(i=0; i<nr; i++){
571	623	m = R[r+i*3+1];
572	624	for(j=0; j<m; j++){
	625	+ char cMark = '-';
573	626	if( showLn ) appendDiffLineno(pOut, a+j+1, 0, html);
574		- appendDiffLine(pOut, '-', &A[a+j], html);
	627	+ if( pRe && re_dline_match(pRe, &A[a+j], 1)==0 ) cMark = ' ';
	628	+ appendDiffLine(pOut, '-', &A[a+j], html, pRe);
575	629	}
576	630	a += m;
577	631	m = R[r+i*3+2];
578	632	for(j=0; j<m; j++){
579	633	if( showLn ) appendDiffLineno(pOut, 0, b+j+1, html);
580		- appendDiffLine(pOut, '+', &B[b+j], html);
	634	+ appendDiffLine(pOut, '+', &B[b+j], html, pRe);
581	635	}
582	636	b += m;
583	637	if( i<nr-1 ){
584	638	m = R[r+i*3+3];
585	639	for(j=0; j<m; j++){
586	640	if( showLn ) appendDiffLineno(pOut, a+j+1, b+j+1, html);
587		- appendDiffLine(pOut, ' ', &B[b+j], html);
	641	+ appendDiffLine(pOut, ' ', &B[b+j], html, 0);
588	642	}
589	643	b += m;
590	644	a += m;
591	645	}
592	646	}
		@@ -595,11 +649,11 @@
595	649	assert( nr==i );
596	650	m = R[r+nr*3];
597	651	if( m>nContext ) m = nContext;
598	652	for(j=0; j<m; j++){
599	653	if( showLn ) appendDiffLineno(pOut, a+j+1, b+j+1, html);
600		- appendDiffLine(pOut, ' ', &B[b+j], html);
	654	+ appendDiffLine(pOut, ' ', &B[b+j], html, 0);
601	655	}
602	656	}
603	657	}
604	658
605	659	/*
		@@ -615,10 +669,11 @@
615	669	const char zStart; / A <span> tag */
616	670	int iEnd; /* Write </span> prior to character iEnd */
617	671	int iStart2; /* Write zStart2 prior to character iStart2 */
618	672	const char zStart2; / A <span> tag */
619	673	int iEnd2; /* Write </span> prior to character iEnd2 */
	674	+ ReCompiled pRe; / Only colorize matching lines, if not NULL */
620	675	};
621	676
622	677	/*
623	678	** Flags for sbsWriteText()
624	679	*/
		@@ -640,13 +695,17 @@
640	695	int k; /* Cursor position */
641	696	int needEndSpan = 0;
642	697	const char *zIn = pLine->z;
643	698	char *z = &p->zLine[p->n];
644	699	int w = p->width;
	700	+ int colorize = p->escHtml;
	701	+ if( colorize && p->pRe && re_dline_match(p->pRe, pLine, 1)==0 ){
	702	+ colorize = 0;
	703	+ }
645	704	for(i=j=k=0; k<w && i<n; i++, k++){
646	705	char c = zIn[i];
647		- if( p->escHtml ){
	706	+ if( colorize ){
648	707	if( i==p->iStart ){
649	708	int x = strlen(p->zStart);
650	709	memcpy(z+j, p->zStart, x);
651	710	j += x;
652	711	needEndSpan = 1;
		@@ -1196,13 +1255,12 @@
1196	1255	** in, compute a side-by-side diff into pOut.
1197	1256	*/
1198	1257	static void sbsDiff(
1199	1258	DContext p, / The computed diff */
1200	1259	Blob pOut, / Write the results here */
1201		- int nContext, /* Number of lines of context around each change */
1202		- int width, /* Width of each column of output */
1203		- int escHtml /* True to generate HTML output */
	1260	+ ReCompiled pRe, / Only show changes that match this regex */
	1261	+ u64 diffFlags /* Flags controlling the diff */
1204	1262	){
1205	1263	DLine A; / Left side of the diff */
1206	1264	DLine B; / Right side of the diff */
1207	1265	int a = 0; /* Index of next line in A[] */
1208	1266	int b = 0; /* Index of next line in B[] */
		@@ -1214,16 +1272,20 @@
1214	1272	int i, j; /* Loop counters */
1215	1273	int m, ma, mb;/* Number of lines to output */
1216	1274	int skip; /* Number of lines to skip */
1217	1275	int nChunk = 0; /* Number of chunks of diff output seen so far */
1218	1276	SbsLine s; /* Output line buffer */
	1277	+ int nContext; /* Lines of context above and below each change */
	1278	+ int showDivider = 0; /* True to show the divider */
1219	1279
1220	1280	memset(&s, 0, sizeof(s));
1221		- s.zLine = fossil_malloc( 15*width + 200 );
	1281	+ s.width = diff_width(diffFlags);
	1282	+ s.zLine = fossil_malloc( 15*s.width + 200 );
1222	1283	if( s.zLine==0 ) return;
1223		- s.width = width;
1224		- s.escHtml = escHtml;
	1284	+ nContext = diff_context_lines(diffFlags);
	1285	+ s.escHtml = (diffFlags & DIFF_HTML)!=0;
	1286	+ s.pRe = pRe;
1225	1287	s.iStart = -1;
1226	1288	s.iStart2 = 0;
1227	1289	s.iEnd = -1;
1228	1290	A = p->aFrom;
1229	1291	B = p->aTo;
		@@ -1232,10 +1294,35 @@
1232	1294	while( mxr>2 && R[mxr-1]==0 && R[mxr-2]==0 ){ mxr -= 3; }
1233	1295	for(r=0; r<mxr; r += 3*nr){
1234	1296	/* Figure out how many triples to show in a single block */
1235	1297	for(nr=1; R[r+nr3]>0 && R[r+nr3]<nContext*2; nr++){}
1236	1298	/* printf("r=%d nr=%d\n", r, nr); */
	1299	+
	1300	+ /* If there is a regex, skip this block (generate no diff output)
	1301	+ ** if the regex matches or does not match both insert and delete.
	1302	+ ** Only display the block if one side matches but the other side does
	1303	+ ** not.
	1304	+ */
	1305	+ if( pRe ){
	1306	+ int hideBlock = 1;
	1307	+ int xa = a, xb = b;
	1308	+ for(i=0; hideBlock && i<nr; i++){
	1309	+ int c1, c2;
	1310	+ xa += R[r+i*3];
	1311	+ xb += R[r+i*3];
	1312	+ c1 = re_dline_match(pRe, &A[xa], R[r+i*3+1]);
	1313	+ c2 = re_dline_match(pRe, &B[xb], R[r+i*3+2]);
	1314	+ hideBlock = c1==c2;
	1315	+ xa += R[r+i*3+1];
	1316	+ xb += R[r+i*3+2];
	1317	+ }
	1318	+ if( hideBlock ){
	1319	+ a = xa;
	1320	+ b = xb;
	1321	+ continue;
	1322	+ }
	1323	+ }
1237	1324
1238	1325	/* For the current block comprising nr triples, figure out
1239	1326	** how many lines of A and B are to be displayed
1240	1327	*/
1241	1328	if( R[r]>nContext ){
		@@ -1260,20 +1347,21 @@
1260	1347	na += R[r+i*3];
1261	1348	nb += R[r+i*3];
1262	1349	}
1263	1350
1264	1351	/* Draw the separator between blocks */
1265		- if( r>0 ){
1266		- if( escHtml ){
	1352	+ if( showDivider ){
	1353	+ if( s.escHtml ){
1267	1354	blob_appendf(pOut, "<span class=\"diffhr\">%.*c</span>\n",
1268		- width*2+16, '.');
	1355	+ s.width*2+16, '.');
1269	1356	}else{
1270		- blob_appendf(pOut, "%.c\n", width2+16, '.');
	1357	+ blob_appendf(pOut, "%.c\n", s.width2+16, '.');
1271	1358	}
1272	1359	}
	1360	+ showDivider = 1;
1273	1361	nChunk++;
1274		- if( escHtml ){
	1362	+ if( s.escHtml ){
1275	1363	blob_appendf(pOut, "<a name=\"chunk%d\"></a>\n", nChunk);
1276	1364	}
1277	1365
1278	1366	/* Show the initial common area */
1279	1367	a += skip;
		@@ -1316,11 +1404,11 @@
1316	1404	sbsWriteLineno(&s, a);
1317	1405	s.iStart = 0;
1318	1406	s.zStart = "<span class=\"diffrm\">";
1319	1407	s.iEnd = s.width;
1320	1408	sbsWriteText(&s, &A[a], SBS_PAD);
1321		- if( escHtml ){
	1409	+ if( s.escHtml ){
1322	1410	sbsWrite(&s, " <\n", 6);
1323	1411	}else{
1324	1412	sbsWrite(&s, " <\n", 3);
1325	1413	}
1326	1414	blob_append(pOut, s.zLine, s.n);
		@@ -1338,12 +1426,12 @@
1338	1426	a++;
1339	1427	b++;
1340	1428	}else if( alignment[j]==2 ){
1341	1429	/* Insert one line on the right */
1342	1430	s.n = 0;
1343		- sbsWriteSpace(&s, width + 7);
1344		- if( escHtml ){
	1431	+ sbsWriteSpace(&s, s.width + 7);
	1432	+ if( s.escHtml ){
1345	1433	sbsWrite(&s, " > ", 6);
1346	1434	}else{
1347	1435	sbsWrite(&s, " > ", 3);
1348	1436	}
1349	1437	sbsWriteLineno(&s, b);
		@@ -1833,10 +1921,11 @@
1833	1921	*/
1834	1922	int *text_diff(
1835	1923	Blob pA_Blob, / FROM file */
1836	1924	Blob pB_Blob, / TO file */
1837	1925	Blob pOut, / Write diff here if not NULL */
	1926	+ ReCompiled pRe, / Only output changes where this Regexp matches */
1838	1927	u64 diffFlags /* DIFF_* flags defined above */
1839	1928	){
1840	1929	int ignoreEolWs; /* Ignore whitespace at the end of lines */
1841	1930	int nContext; /* Amount of context to display */
1842	1931	DContext c;
		@@ -1844,11 +1933,10 @@
1844	1933	if( diffFlags & DIFF_INVERT ){
1845	1934	Blob *pTemp = pA_Blob;
1846	1935	pA_Blob = pB_Blob;
1847	1936	pB_Blob = pTemp;
1848	1937	}
1849		- nContext = diff_context_lines(diffFlags);
1850	1938	ignoreEolWs = (diffFlags & DIFF_IGNORE_EOLWS)!=0;
1851	1939
1852	1940	/* Prepare the input files */
1853	1941	memset(&c, 0, sizeof(c));
1854	1942	c.aFrom = break_into_lines(blob_str(pA_Blob), blob_size(pA_Blob),
		@@ -1868,17 +1956,14 @@
1868	1956	diff_all(&c);
1869	1957	if( (diffFlags & DIFF_NOOPT)==0 ) diff_optimize(&c);
1870	1958
1871	1959	if( pOut ){
1872	1960	/* Compute a context or side-by-side diff into pOut */
1873		- int escHtml = (diffFlags & DIFF_HTML)!=0;
1874	1961	if( diffFlags & DIFF_SIDEBYSIDE ){
1875		- int width = diff_width(diffFlags);
1876		- sbsDiff(&c, pOut, nContext, width, escHtml);
	1962	+ sbsDiff(&c, pOut, pRe, diffFlags);
1877	1963	}else{
1878		- int showLn = (diffFlags & DIFF_LINENO)!=0;
1879		- contextDiff(&c, pOut, nContext, showLn, escHtml);
	1964	+ contextDiff(&c, pOut, pRe, diffFlags);
1880	1965	}
1881	1966	fossil_free(c.aFrom);
1882	1967	fossil_free(c.aTo);
1883	1968	fossil_free(c.aEdit);
1884	1969	return 0;
		@@ -1941,11 +2026,11 @@
1941	2026	if( g.argc<4 ) usage("FILE1 FILE2 ...");
1942	2027	blob_read_from_file(&a, g.argv[2]);
1943	2028	for(i=3; i<g.argc; i++){
1944	2029	if( i>3 ) fossil_print("-------------------------------\n");
1945	2030	blob_read_from_file(&b, g.argv[i]);
1946		- R = text_diff(&a, &b, 0, diffFlags);
	2031	+ R = text_diff(&a, &b, 0, 0, diffFlags);
1947	2032	for(r=0; R[r] \|\| R[r+1] \|\| R[r+2]; r += 3){
1948	2033	fossil_print(" copy %4d delete %4d insert %4d\n", R[r], R[r+1], R[r+2]);
1949	2034	}
1950	2035	/* free(R); */
1951	2036	blob_reset(&b);
		@@ -1960,25 +2045,33 @@
1960	2045	** Print the difference between two files. The usual diff options apply.
1961	2046	*/
1962	2047	void test_diff_cmd(void){
1963	2048	Blob a, b, out;
1964	2049	u64 diffFlag;
	2050	+ const char zRe; / Regex filter for diff output */
	2051	+ ReCompiled pRe = 0; / Regex filter for diff output */
1965	2052
1966	2053	if( find_option("tk",0,0)!=0 ){
1967	2054	diff_tk("test-diff", 2);
1968	2055	return;
1969	2056	}
1970	2057	find_option("i",0,0);
	2058	+ zRe = find_option("regexp","e",1);
	2059	+ if( zRe ){
	2060	+ const char *zErr = re_compile(&pRe, zRe, 0);
	2061	+ if( zErr ) fossil_fatal("regex error: %s", zErr);
	2062	+ }
1971	2063	diffFlag = diff_options();
1972	2064	verify_all_options();
1973	2065	if( g.argc!=4 ) usage("FILE1 FILE2");
1974	2066	diff_print_filenames(g.argv[2], g.argv[3], diffFlag);
1975	2067	blob_read_from_file(&a, g.argv[2]);
1976	2068	blob_read_from_file(&b, g.argv[3]);
1977	2069	blob_zero(&out);
1978		- text_diff(&a, &b, &out, diffFlag);
	2070	+ text_diff(&a, &b, &out, pRe, diffFlag);
1979	2071	blob_write_to_file(&out, "-");
	2072	+ re_free(pRe);
1980	2073	}
1981	2074
1982	2075	/**************************************************************************
1983	2076	** The basic difference engine is above. What follows is the annotation
1984	2077	** engine. Both are in the same file since they share many components.
1985	2078

	--- src/diff.c
	+++ src/diff.c
	@@ -412,24 +412,45 @@
412	** Return true if two DLine elements are identical.
413	*/
414	static int same_dline(DLine pA, DLine pB){
415	return pA->h==pB->h && memcmp(pA->z,pB->z,pA->h & LENGTH_MASK)==0;
416	}


















417
418	/*
419	** Append a single line of context-diff output to pOut.
420	*/
421	static void appendDiffLine(
422	Blob pOut, / Where to write the line of output */
423	char cPrefix, /* One of " ", "+", or "-" */
424	DLine pLine, / The line to be output */
425	int html /* True if generating HTML. False for plain text */

426	){
427	blob_append(pOut, &cPrefix, 1);
428	if( html ){
429	char *zHtml;
430	if( cPrefix=='+' ){


431	blob_append(pOut, "<span class=\"diffadd\">", -1);
432	}else if( cPrefix=='-' ){
433	blob_append(pOut, "<span class=\"diffrm\">", -1);
434	}
435	zHtml = htmlize(pLine->z, (pLine->h & LENGTH_MASK));
	@@ -463,21 +484,19 @@
463	blob_append(pOut, " ", 8);
464	}
465	if( html ) blob_append(pOut, "</span>", -1);
466	}
467
468
469	/*
470	** Given a raw diff p[] in which the p->aEdit[] array has been filled
471	** in, compute a context diff into pOut.
472	*/
473	static void contextDiff(
474	DContext p, / The difference */
475	Blob pOut, / Output a context diff to here */
476	int nContext, /* Number of lines of context */
477	int showLn, /* Show line numbers */
478	int html /* Render as HTML */
479	){
480	DLine A; / Left side of the diff */
481	DLine B; / Right side of the diff */
482	int a = 0; /* Index of next line in A[] */
483	int b = 0; /* Index of next line in B[] */
	@@ -488,11 +507,18 @@
488	int na, nb; /* Number of lines shown from A and B */
489	int i, j; /* Loop counters */
490	int m; /* Number of lines to output */
491	int skip; /* Number of lines to skip */
492	int nChunk = 0; /* Number of diff chunks seen so far */




493



494	A = p->aFrom;
495	B = p->aTo;
496	R = p->aEdit;
497	mxr = p->nEdit;
498	while( mxr>2 && R[mxr-1]==0 && R[mxr-2]==0 ){ mxr -= 3; }
	@@ -499,10 +525,35 @@
499	for(r=0; r<mxr; r += 3*nr){
500	/* Figure out how many triples to show in a single block */
501	for(nr=1; R[r+nr3]>0 && R[r+nr3]<nContext*2; nr++){}
502	/* printf("r=%d nr=%d\n", r, nr); */
503

























504	/* For the current block comprising nr triples, figure out
505	** how many lines of A and B are to be displayed
506	*/
507	if( R[r]>nContext ){
508	na = nb = nContext;
	@@ -531,12 +582,13 @@
531	** context diff that contains line numbers, show the separator from
532	** the previous block.
533	*/
534	nChunk++;
535	if( showLn ){
536	if( r==0 ){
537	/* Do not show a top divider */

538	}else if( html ){
539	blob_appendf(pOut, "<span class=\"diffhr\">%.80c</span>\n", '.');
540	blob_appendf(pOut, "<a name=\"chunk%d\"></a>\n", nChunk);
541	}else{
542	blob_appendf(pOut, "%.80c\n", '.');
	@@ -559,34 +611,36 @@
559	a += skip;
560	b += skip;
561	m = R[r] - skip;
562	for(j=0; j<m; j++){
563	if( showLn ) appendDiffLineno(pOut, a+j+1, b+j+1, html);
564	appendDiffLine(pOut, ' ', &A[a+j], html);
565	}
566	a += m;
567	b += m;
568
569	/* Show the differences */
570	for(i=0; i<nr; i++){
571	m = R[r+i*3+1];
572	for(j=0; j<m; j++){

573	if( showLn ) appendDiffLineno(pOut, a+j+1, 0, html);
574	appendDiffLine(pOut, '-', &A[a+j], html);

575	}
576	a += m;
577	m = R[r+i*3+2];
578	for(j=0; j<m; j++){
579	if( showLn ) appendDiffLineno(pOut, 0, b+j+1, html);
580	appendDiffLine(pOut, '+', &B[b+j], html);
581	}
582	b += m;
583	if( i<nr-1 ){
584	m = R[r+i*3+3];
585	for(j=0; j<m; j++){
586	if( showLn ) appendDiffLineno(pOut, a+j+1, b+j+1, html);
587	appendDiffLine(pOut, ' ', &B[b+j], html);
588	}
589	b += m;
590	a += m;
591	}
592	}
	@@ -595,11 +649,11 @@
595	assert( nr==i );
596	m = R[r+nr*3];
597	if( m>nContext ) m = nContext;
598	for(j=0; j<m; j++){
599	if( showLn ) appendDiffLineno(pOut, a+j+1, b+j+1, html);
600	appendDiffLine(pOut, ' ', &B[b+j], html);
601	}
602	}
603	}
604
605	/*
	@@ -615,10 +669,11 @@
615	const char zStart; / A <span> tag */
616	int iEnd; /* Write </span> prior to character iEnd */
617	int iStart2; /* Write zStart2 prior to character iStart2 */
618	const char zStart2; / A <span> tag */
619	int iEnd2; /* Write </span> prior to character iEnd2 */

620	};
621
622	/*
623	** Flags for sbsWriteText()
624	*/
	@@ -640,13 +695,17 @@
640	int k; /* Cursor position */
641	int needEndSpan = 0;
642	const char *zIn = pLine->z;
643	char *z = &p->zLine[p->n];
644	int w = p->width;




645	for(i=j=k=0; k<w && i<n; i++, k++){
646	char c = zIn[i];
647	if( p->escHtml ){
648	if( i==p->iStart ){
649	int x = strlen(p->zStart);
650	memcpy(z+j, p->zStart, x);
651	j += x;
652	needEndSpan = 1;
	@@ -1196,13 +1255,12 @@
1196	** in, compute a side-by-side diff into pOut.
1197	*/
1198	static void sbsDiff(
1199	DContext p, / The computed diff */
1200	Blob pOut, / Write the results here */
1201	int nContext, /* Number of lines of context around each change */
1202	int width, /* Width of each column of output */
1203	int escHtml /* True to generate HTML output */
1204	){
1205	DLine A; / Left side of the diff */
1206	DLine B; / Right side of the diff */
1207	int a = 0; /* Index of next line in A[] */
1208	int b = 0; /* Index of next line in B[] */
	@@ -1214,16 +1272,20 @@
1214	int i, j; /* Loop counters */
1215	int m, ma, mb;/* Number of lines to output */
1216	int skip; /* Number of lines to skip */
1217	int nChunk = 0; /* Number of chunks of diff output seen so far */
1218	SbsLine s; /* Output line buffer */


1219
1220	memset(&s, 0, sizeof(s));
1221	s.zLine = fossil_malloc( 15*width + 200 );

1222	if( s.zLine==0 ) return;
1223	s.width = width;
1224	s.escHtml = escHtml;

1225	s.iStart = -1;
1226	s.iStart2 = 0;
1227	s.iEnd = -1;
1228	A = p->aFrom;
1229	B = p->aTo;
	@@ -1232,10 +1294,35 @@
1232	while( mxr>2 && R[mxr-1]==0 && R[mxr-2]==0 ){ mxr -= 3; }
1233	for(r=0; r<mxr; r += 3*nr){
1234	/* Figure out how many triples to show in a single block */
1235	for(nr=1; R[r+nr3]>0 && R[r+nr3]<nContext*2; nr++){}
1236	/* printf("r=%d nr=%d\n", r, nr); */

























1237
1238	/* For the current block comprising nr triples, figure out
1239	** how many lines of A and B are to be displayed
1240	*/
1241	if( R[r]>nContext ){
	@@ -1260,20 +1347,21 @@
1260	na += R[r+i*3];
1261	nb += R[r+i*3];
1262	}
1263
1264	/* Draw the separator between blocks */
1265	if( r>0 ){
1266	if( escHtml ){
1267	blob_appendf(pOut, "<span class=\"diffhr\">%.*c</span>\n",
1268	width*2+16, '.');
1269	}else{
1270	blob_appendf(pOut, "%.c\n", width2+16, '.');
1271	}
1272	}

1273	nChunk++;
1274	if( escHtml ){
1275	blob_appendf(pOut, "<a name=\"chunk%d\"></a>\n", nChunk);
1276	}
1277
1278	/* Show the initial common area */
1279	a += skip;
	@@ -1316,11 +1404,11 @@
1316	sbsWriteLineno(&s, a);
1317	s.iStart = 0;
1318	s.zStart = "<span class=\"diffrm\">";
1319	s.iEnd = s.width;
1320	sbsWriteText(&s, &A[a], SBS_PAD);
1321	if( escHtml ){
1322	sbsWrite(&s, " <\n", 6);
1323	}else{
1324	sbsWrite(&s, " <\n", 3);
1325	}
1326	blob_append(pOut, s.zLine, s.n);
	@@ -1338,12 +1426,12 @@
1338	a++;
1339	b++;
1340	}else if( alignment[j]==2 ){
1341	/* Insert one line on the right */
1342	s.n = 0;
1343	sbsWriteSpace(&s, width + 7);
1344	if( escHtml ){
1345	sbsWrite(&s, " > ", 6);
1346	}else{
1347	sbsWrite(&s, " > ", 3);
1348	}
1349	sbsWriteLineno(&s, b);
	@@ -1833,10 +1921,11 @@
1833	*/
1834	int *text_diff(
1835	Blob pA_Blob, / FROM file */
1836	Blob pB_Blob, / TO file */
1837	Blob pOut, / Write diff here if not NULL */

1838	u64 diffFlags /* DIFF_* flags defined above */
1839	){
1840	int ignoreEolWs; /* Ignore whitespace at the end of lines */
1841	int nContext; /* Amount of context to display */
1842	DContext c;
	@@ -1844,11 +1933,10 @@
1844	if( diffFlags & DIFF_INVERT ){
1845	Blob *pTemp = pA_Blob;
1846	pA_Blob = pB_Blob;
1847	pB_Blob = pTemp;
1848	}
1849	nContext = diff_context_lines(diffFlags);
1850	ignoreEolWs = (diffFlags & DIFF_IGNORE_EOLWS)!=0;
1851
1852	/* Prepare the input files */
1853	memset(&c, 0, sizeof(c));
1854	c.aFrom = break_into_lines(blob_str(pA_Blob), blob_size(pA_Blob),
	@@ -1868,17 +1956,14 @@
1868	diff_all(&c);
1869	if( (diffFlags & DIFF_NOOPT)==0 ) diff_optimize(&c);
1870
1871	if( pOut ){
1872	/* Compute a context or side-by-side diff into pOut */
1873	int escHtml = (diffFlags & DIFF_HTML)!=0;
1874	if( diffFlags & DIFF_SIDEBYSIDE ){
1875	int width = diff_width(diffFlags);
1876	sbsDiff(&c, pOut, nContext, width, escHtml);
1877	}else{
1878	int showLn = (diffFlags & DIFF_LINENO)!=0;
1879	contextDiff(&c, pOut, nContext, showLn, escHtml);
1880	}
1881	fossil_free(c.aFrom);
1882	fossil_free(c.aTo);
1883	fossil_free(c.aEdit);
1884	return 0;
	@@ -1941,11 +2026,11 @@
1941	if( g.argc<4 ) usage("FILE1 FILE2 ...");
1942	blob_read_from_file(&a, g.argv[2]);
1943	for(i=3; i<g.argc; i++){
1944	if( i>3 ) fossil_print("-------------------------------\n");
1945	blob_read_from_file(&b, g.argv[i]);
1946	R = text_diff(&a, &b, 0, diffFlags);
1947	for(r=0; R[r] \|\| R[r+1] \|\| R[r+2]; r += 3){
1948	fossil_print(" copy %4d delete %4d insert %4d\n", R[r], R[r+1], R[r+2]);
1949	}
1950	/* free(R); */
1951	blob_reset(&b);
	@@ -1960,25 +2045,33 @@
1960	** Print the difference between two files. The usual diff options apply.
1961	*/
1962	void test_diff_cmd(void){
1963	Blob a, b, out;
1964	u64 diffFlag;


1965
1966	if( find_option("tk",0,0)!=0 ){
1967	diff_tk("test-diff", 2);
1968	return;
1969	}
1970	find_option("i",0,0);





1971	diffFlag = diff_options();
1972	verify_all_options();
1973	if( g.argc!=4 ) usage("FILE1 FILE2");
1974	diff_print_filenames(g.argv[2], g.argv[3], diffFlag);
1975	blob_read_from_file(&a, g.argv[2]);
1976	blob_read_from_file(&b, g.argv[3]);
1977	blob_zero(&out);
1978	text_diff(&a, &b, &out, diffFlag);
1979	blob_write_to_file(&out, "-");

1980	}
1981
1982	/**************************************************************************
1983	** The basic difference engine is above. What follows is the annotation
1984	** engine. Both are in the same file since they share many components.
1985

	--- src/diff.c
	+++ src/diff.c
	@@ -412,24 +412,45 @@
412	** Return true if two DLine elements are identical.
413	*/
414	static int same_dline(DLine pA, DLine pB){
415	return pA->h==pB->h && memcmp(pA->z,pB->z,pA->h & LENGTH_MASK)==0;
416	}
417
418	/*
419	** Return true if the regular expression *pRe matches any of the
420	** N dlines
421	*/
422	static int re_dline_match(
423	ReCompiled pRe, / The regular expression to be matched */
424	DLine aDLine, / First of N DLines to compare against */
425	int N /* Number of DLines to check */
426	){
427	while( N-- ){
428	if( re_exec(pRe, aDLine->z, LENGTH(aDLine)) ){
429	return 1;
430	}
431	aDLine++;
432	}
433	return 0;
434	}
435
436	/*
437	** Append a single line of context-diff output to pOut.
438	*/
439	static void appendDiffLine(
440	Blob pOut, / Where to write the line of output */
441	char cPrefix, /* One of " ", "+", or "-" */
442	DLine pLine, / The line to be output */
443	int html, /* True if generating HTML. False for plain text */
444	ReCompiled pRe / Colorize only if line matches this Regex */
445	){
446	blob_append(pOut, &cPrefix, 1);
447	if( html ){
448	char *zHtml;
449	if( pRe && re_dline_match(pRe, pLine, 1)==0 ){
450	cPrefix = ' ';
451	}else if( cPrefix=='+' ){
452	blob_append(pOut, "<span class=\"diffadd\">", -1);
453	}else if( cPrefix=='-' ){
454	blob_append(pOut, "<span class=\"diffrm\">", -1);
455	}
456	zHtml = htmlize(pLine->z, (pLine->h & LENGTH_MASK));
	@@ -463,21 +484,19 @@
484	blob_append(pOut, " ", 8);
485	}
486	if( html ) blob_append(pOut, "</span>", -1);
487	}
488

489	/*
490	** Given a raw diff p[] in which the p->aEdit[] array has been filled
491	** in, compute a context diff into pOut.
492	*/
493	static void contextDiff(
494	DContext p, / The difference */
495	Blob pOut, / Output a context diff to here */
496	ReCompiled pRe, / Only show changes that match this regex */
497	u64 diffFlags /* Flags controlling the diff format */

498	){
499	DLine A; / Left side of the diff */
500	DLine B; / Right side of the diff */
501	int a = 0; /* Index of next line in A[] */
502	int b = 0; /* Index of next line in B[] */
	@@ -488,11 +507,18 @@
507	int na, nb; /* Number of lines shown from A and B */
508	int i, j; /* Loop counters */
509	int m; /* Number of lines to output */
510	int skip; /* Number of lines to skip */
511	int nChunk = 0; /* Number of diff chunks seen so far */
512	int nContext; /* Number of lines of context */
513	int showLn; /* Show line numbers */
514	int html; /* Render as HTML */
515	int showDivider = 0; /* True to show the divider between diff blocks */
516
517	nContext = diff_context_lines(diffFlags);
518	showLn = (diffFlags & DIFF_LINENO)!=0;
519	html = (diffFlags & DIFF_HTML)!=0;
520	A = p->aFrom;
521	B = p->aTo;
522	R = p->aEdit;
523	mxr = p->nEdit;
524	while( mxr>2 && R[mxr-1]==0 && R[mxr-2]==0 ){ mxr -= 3; }
	@@ -499,10 +525,35 @@
525	for(r=0; r<mxr; r += 3*nr){
526	/* Figure out how many triples to show in a single block */
527	for(nr=1; R[r+nr3]>0 && R[r+nr3]<nContext*2; nr++){}
528	/* printf("r=%d nr=%d\n", r, nr); */
529
530	/* If there is a regex, skip this block (generate no diff output)
531	** if the regex matches or does not match both insert and delete.
532	** Only display the block if one side matches but the other side does
533	** not.
534	*/
535	if( pRe ){
536	int hideBlock = 1;
537	int xa = a, xb = b;
538	for(i=0; hideBlock && i<nr; i++){
539	int c1, c2;
540	xa += R[r+i*3];
541	xb += R[r+i*3];
542	c1 = re_dline_match(pRe, &A[xa], R[r+i*3+1]);
543	c2 = re_dline_match(pRe, &B[xb], R[r+i*3+2]);
544	hideBlock = c1==c2;
545	xa += R[r+i*3+1];
546	xb += R[r+i*3+2];
547	}
548	if( hideBlock ){
549	a = xa;
550	b = xb;
551	continue;
552	}
553	}
554
555	/* For the current block comprising nr triples, figure out
556	** how many lines of A and B are to be displayed
557	*/
558	if( R[r]>nContext ){
559	na = nb = nContext;
	@@ -531,12 +582,13 @@
582	** context diff that contains line numbers, show the separator from
583	** the previous block.
584	*/
585	nChunk++;
586	if( showLn ){
587	if( !showDivider ){
588	/* Do not show a top divider */
589	showDivider = 1;
590	}else if( html ){
591	blob_appendf(pOut, "<span class=\"diffhr\">%.80c</span>\n", '.');
592	blob_appendf(pOut, "<a name=\"chunk%d\"></a>\n", nChunk);
593	}else{
594	blob_appendf(pOut, "%.80c\n", '.');
	@@ -559,34 +611,36 @@
611	a += skip;
612	b += skip;
613	m = R[r] - skip;
614	for(j=0; j<m; j++){
615	if( showLn ) appendDiffLineno(pOut, a+j+1, b+j+1, html);
616	appendDiffLine(pOut, ' ', &A[a+j], html, 0);
617	}
618	a += m;
619	b += m;
620
621	/* Show the differences */
622	for(i=0; i<nr; i++){
623	m = R[r+i*3+1];
624	for(j=0; j<m; j++){
625	char cMark = '-';
626	if( showLn ) appendDiffLineno(pOut, a+j+1, 0, html);
627	if( pRe && re_dline_match(pRe, &A[a+j], 1)==0 ) cMark = ' ';
628	appendDiffLine(pOut, '-', &A[a+j], html, pRe);
629	}
630	a += m;
631	m = R[r+i*3+2];
632	for(j=0; j<m; j++){
633	if( showLn ) appendDiffLineno(pOut, 0, b+j+1, html);
634	appendDiffLine(pOut, '+', &B[b+j], html, pRe);
635	}
636	b += m;
637	if( i<nr-1 ){
638	m = R[r+i*3+3];
639	for(j=0; j<m; j++){
640	if( showLn ) appendDiffLineno(pOut, a+j+1, b+j+1, html);
641	appendDiffLine(pOut, ' ', &B[b+j], html, 0);
642	}
643	b += m;
644	a += m;
645	}
646	}
	@@ -595,11 +649,11 @@
649	assert( nr==i );
650	m = R[r+nr*3];
651	if( m>nContext ) m = nContext;
652	for(j=0; j<m; j++){
653	if( showLn ) appendDiffLineno(pOut, a+j+1, b+j+1, html);
654	appendDiffLine(pOut, ' ', &B[b+j], html, 0);
655	}
656	}
657	}
658
659	/*
	@@ -615,10 +669,11 @@
669	const char zStart; / A <span> tag */
670	int iEnd; /* Write </span> prior to character iEnd */
671	int iStart2; /* Write zStart2 prior to character iStart2 */
672	const char zStart2; / A <span> tag */
673	int iEnd2; /* Write </span> prior to character iEnd2 */
674	ReCompiled pRe; / Only colorize matching lines, if not NULL */
675	};
676
677	/*
678	** Flags for sbsWriteText()
679	*/
	@@ -640,13 +695,17 @@
695	int k; /* Cursor position */
696	int needEndSpan = 0;
697	const char *zIn = pLine->z;
698	char *z = &p->zLine[p->n];
699	int w = p->width;
700	int colorize = p->escHtml;
701	if( colorize && p->pRe && re_dline_match(p->pRe, pLine, 1)==0 ){
702	colorize = 0;
703	}
704	for(i=j=k=0; k<w && i<n; i++, k++){
705	char c = zIn[i];
706	if( colorize ){
707	if( i==p->iStart ){
708	int x = strlen(p->zStart);
709	memcpy(z+j, p->zStart, x);
710	j += x;
711	needEndSpan = 1;
	@@ -1196,13 +1255,12 @@
1255	** in, compute a side-by-side diff into pOut.
1256	*/
1257	static void sbsDiff(
1258	DContext p, / The computed diff */
1259	Blob pOut, / Write the results here */
1260	ReCompiled pRe, / Only show changes that match this regex */
1261	u64 diffFlags /* Flags controlling the diff */

1262	){
1263	DLine A; / Left side of the diff */
1264	DLine B; / Right side of the diff */
1265	int a = 0; /* Index of next line in A[] */
1266	int b = 0; /* Index of next line in B[] */
	@@ -1214,16 +1272,20 @@
1272	int i, j; /* Loop counters */
1273	int m, ma, mb;/* Number of lines to output */
1274	int skip; /* Number of lines to skip */
1275	int nChunk = 0; /* Number of chunks of diff output seen so far */
1276	SbsLine s; /* Output line buffer */
1277	int nContext; /* Lines of context above and below each change */
1278	int showDivider = 0; /* True to show the divider */
1279
1280	memset(&s, 0, sizeof(s));
1281	s.width = diff_width(diffFlags);
1282	s.zLine = fossil_malloc( 15*s.width + 200 );
1283	if( s.zLine==0 ) return;
1284	nContext = diff_context_lines(diffFlags);
1285	s.escHtml = (diffFlags & DIFF_HTML)!=0;
1286	s.pRe = pRe;
1287	s.iStart = -1;
1288	s.iStart2 = 0;
1289	s.iEnd = -1;
1290	A = p->aFrom;
1291	B = p->aTo;
	@@ -1232,10 +1294,35 @@
1294	while( mxr>2 && R[mxr-1]==0 && R[mxr-2]==0 ){ mxr -= 3; }
1295	for(r=0; r<mxr; r += 3*nr){
1296	/* Figure out how many triples to show in a single block */
1297	for(nr=1; R[r+nr3]>0 && R[r+nr3]<nContext*2; nr++){}
1298	/* printf("r=%d nr=%d\n", r, nr); */
1299
1300	/* If there is a regex, skip this block (generate no diff output)
1301	** if the regex matches or does not match both insert and delete.
1302	** Only display the block if one side matches but the other side does
1303	** not.
1304	*/
1305	if( pRe ){
1306	int hideBlock = 1;
1307	int xa = a, xb = b;
1308	for(i=0; hideBlock && i<nr; i++){
1309	int c1, c2;
1310	xa += R[r+i*3];
1311	xb += R[r+i*3];
1312	c1 = re_dline_match(pRe, &A[xa], R[r+i*3+1]);
1313	c2 = re_dline_match(pRe, &B[xb], R[r+i*3+2]);
1314	hideBlock = c1==c2;
1315	xa += R[r+i*3+1];
1316	xb += R[r+i*3+2];
1317	}
1318	if( hideBlock ){
1319	a = xa;
1320	b = xb;
1321	continue;
1322	}
1323	}
1324
1325	/* For the current block comprising nr triples, figure out
1326	** how many lines of A and B are to be displayed
1327	*/
1328	if( R[r]>nContext ){
	@@ -1260,20 +1347,21 @@
1347	na += R[r+i*3];
1348	nb += R[r+i*3];
1349	}
1350
1351	/* Draw the separator between blocks */
1352	if( showDivider ){
1353	if( s.escHtml ){
1354	blob_appendf(pOut, "<span class=\"diffhr\">%.*c</span>\n",
1355	s.width*2+16, '.');
1356	}else{
1357	blob_appendf(pOut, "%.c\n", s.width2+16, '.');
1358	}
1359	}
1360	showDivider = 1;
1361	nChunk++;
1362	if( s.escHtml ){
1363	blob_appendf(pOut, "<a name=\"chunk%d\"></a>\n", nChunk);
1364	}
1365
1366	/* Show the initial common area */
1367	a += skip;
	@@ -1316,11 +1404,11 @@
1404	sbsWriteLineno(&s, a);
1405	s.iStart = 0;
1406	s.zStart = "<span class=\"diffrm\">";
1407	s.iEnd = s.width;
1408	sbsWriteText(&s, &A[a], SBS_PAD);
1409	if( s.escHtml ){
1410	sbsWrite(&s, " <\n", 6);
1411	}else{
1412	sbsWrite(&s, " <\n", 3);
1413	}
1414	blob_append(pOut, s.zLine, s.n);
	@@ -1338,12 +1426,12 @@
1426	a++;
1427	b++;
1428	}else if( alignment[j]==2 ){
1429	/* Insert one line on the right */
1430	s.n = 0;
1431	sbsWriteSpace(&s, s.width + 7);
1432	if( s.escHtml ){
1433	sbsWrite(&s, " > ", 6);
1434	}else{
1435	sbsWrite(&s, " > ", 3);
1436	}
1437	sbsWriteLineno(&s, b);
	@@ -1833,10 +1921,11 @@
1921	*/
1922	int *text_diff(
1923	Blob pA_Blob, / FROM file */
1924	Blob pB_Blob, / TO file */
1925	Blob pOut, / Write diff here if not NULL */
1926	ReCompiled pRe, / Only output changes where this Regexp matches */
1927	u64 diffFlags /* DIFF_* flags defined above */
1928	){
1929	int ignoreEolWs; /* Ignore whitespace at the end of lines */
1930	int nContext; /* Amount of context to display */
1931	DContext c;
	@@ -1844,11 +1933,10 @@
1933	if( diffFlags & DIFF_INVERT ){
1934	Blob *pTemp = pA_Blob;
1935	pA_Blob = pB_Blob;
1936	pB_Blob = pTemp;
1937	}

1938	ignoreEolWs = (diffFlags & DIFF_IGNORE_EOLWS)!=0;
1939
1940	/* Prepare the input files */
1941	memset(&c, 0, sizeof(c));
1942	c.aFrom = break_into_lines(blob_str(pA_Blob), blob_size(pA_Blob),
	@@ -1868,17 +1956,14 @@
1956	diff_all(&c);
1957	if( (diffFlags & DIFF_NOOPT)==0 ) diff_optimize(&c);
1958
1959	if( pOut ){
1960	/* Compute a context or side-by-side diff into pOut */

1961	if( diffFlags & DIFF_SIDEBYSIDE ){
1962	sbsDiff(&c, pOut, pRe, diffFlags);

1963	}else{
1964	contextDiff(&c, pOut, pRe, diffFlags);

1965	}
1966	fossil_free(c.aFrom);
1967	fossil_free(c.aTo);
1968	fossil_free(c.aEdit);
1969	return 0;
	@@ -1941,11 +2026,11 @@
2026	if( g.argc<4 ) usage("FILE1 FILE2 ...");
2027	blob_read_from_file(&a, g.argv[2]);
2028	for(i=3; i<g.argc; i++){
2029	if( i>3 ) fossil_print("-------------------------------\n");
2030	blob_read_from_file(&b, g.argv[i]);
2031	R = text_diff(&a, &b, 0, 0, diffFlags);
2032	for(r=0; R[r] \|\| R[r+1] \|\| R[r+2]; r += 3){
2033	fossil_print(" copy %4d delete %4d insert %4d\n", R[r], R[r+1], R[r+2]);
2034	}
2035	/* free(R); */
2036	blob_reset(&b);
	@@ -1960,25 +2045,33 @@
2045	** Print the difference between two files. The usual diff options apply.
2046	*/
2047	void test_diff_cmd(void){
2048	Blob a, b, out;
2049	u64 diffFlag;
2050	const char zRe; / Regex filter for diff output */
2051	ReCompiled pRe = 0; / Regex filter for diff output */
2052
2053	if( find_option("tk",0,0)!=0 ){
2054	diff_tk("test-diff", 2);
2055	return;
2056	}
2057	find_option("i",0,0);
2058	zRe = find_option("regexp","e",1);
2059	if( zRe ){
2060	const char *zErr = re_compile(&pRe, zRe, 0);
2061	if( zErr ) fossil_fatal("regex error: %s", zErr);
2062	}
2063	diffFlag = diff_options();
2064	verify_all_options();
2065	if( g.argc!=4 ) usage("FILE1 FILE2");
2066	diff_print_filenames(g.argv[2], g.argv[3], diffFlag);
2067	blob_read_from_file(&a, g.argv[2]);
2068	blob_read_from_file(&b, g.argv[3]);
2069	blob_zero(&out);
2070	text_diff(&a, &b, &out, pRe, diffFlag);
2071	blob_write_to_file(&out, "-");
2072	re_free(pRe);
2073	}
2074
2075	/**************************************************************************
2076	** The basic difference engine is above. What follows is the annotation
2077	** engine. Both are in the same file since they share many components.
2078

M src/diffcmd.c

+2 -2

		--- src/diffcmd.c
		+++ src/diffcmd.c
		@@ -109,11 +109,11 @@
109	109	if( blob_compare(pFile1, &file2) ){
110	110	fossil_print("CHANGED %s\n", zName);
111	111	}
112	112	}else{
113	113	blob_zero(&out);
114		- text_diff(pFile1, &file2, &out, diffFlags);
	114	+ text_diff(pFile1, &file2, &out, 0, diffFlags);
115	115	if( blob_size(&out) ){
116	116	diff_print_filenames(zName, zName2, diffFlags);
117	117	fossil_print("%s\n", blob_str(&out));
118	118	}
119	119	blob_reset(&out);
		@@ -210,11 +210,11 @@
210	210	if( diffFlags & DIFF_BRIEF ) return;
211	211	if( zDiffCmd==0 ){
212	212	Blob out; /* Diff output text */
213	213
214	214	blob_zero(&out);
215		- text_diff(pFile1, pFile2, &out, diffFlags);
	215	+ text_diff(pFile1, pFile2, &out, 0, diffFlags);
216	216	diff_print_filenames(zName, zName, diffFlags);
217	217	fossil_print("%s\n", blob_str(&out));
218	218
219	219	/* Release memory resources */
220	220	blob_reset(&out);
221	221

	--- src/diffcmd.c
	+++ src/diffcmd.c
	@@ -109,11 +109,11 @@
109	if( blob_compare(pFile1, &file2) ){
110	fossil_print("CHANGED %s\n", zName);
111	}
112	}else{
113	blob_zero(&out);
114	text_diff(pFile1, &file2, &out, diffFlags);
115	if( blob_size(&out) ){
116	diff_print_filenames(zName, zName2, diffFlags);
117	fossil_print("%s\n", blob_str(&out));
118	}
119	blob_reset(&out);
	@@ -210,11 +210,11 @@
210	if( diffFlags & DIFF_BRIEF ) return;
211	if( zDiffCmd==0 ){
212	Blob out; /* Diff output text */
213
214	blob_zero(&out);
215	text_diff(pFile1, pFile2, &out, diffFlags);
216	diff_print_filenames(zName, zName, diffFlags);
217	fossil_print("%s\n", blob_str(&out));
218
219	/* Release memory resources */
220	blob_reset(&out);
221

	--- src/diffcmd.c
	+++ src/diffcmd.c
	@@ -109,11 +109,11 @@
109	if( blob_compare(pFile1, &file2) ){
110	fossil_print("CHANGED %s\n", zName);
111	}
112	}else{
113	blob_zero(&out);
114	text_diff(pFile1, &file2, &out, 0, diffFlags);
115	if( blob_size(&out) ){
116	diff_print_filenames(zName, zName2, diffFlags);
117	fossil_print("%s\n", blob_str(&out));
118	}
119	blob_reset(&out);
	@@ -210,11 +210,11 @@
210	if( diffFlags & DIFF_BRIEF ) return;
211	if( zDiffCmd==0 ){
212	Blob out; /* Diff output text */
213
214	blob_zero(&out);
215	text_diff(pFile1, pFile2, &out, 0, diffFlags);
216	diff_print_filenames(zName, zName, diffFlags);
217	fossil_print("%s\n", blob_str(&out));
218
219	/* Release memory resources */
220	blob_reset(&out);
221

M src/info.c

+3 -3

		--- src/info.c
		+++ src/info.c
		@@ -307,16 +307,16 @@
307	307	}else{
308	308	blob_zero(&to);
309	309	}
310	310	blob_zero(&out);
311	311	if( diffFlags & DIFF_SIDEBYSIDE ){
312		- text_diff(&from, &to, &out, diffFlags \| DIFF_HTML);
	312	+ text_diff(&from, &to, &out, 0, diffFlags \| DIFF_HTML);
313	313	@ <div class="sbsdiff">
314	314	@ %s(blob_str(&out))
315	315	@ </div>
316	316	}else{
317		- text_diff(&from, &to, &out, diffFlags \| DIFF_LINENO \| DIFF_HTML);
	317	+ text_diff(&from, &to, &out, 0, diffFlags \| DIFF_LINENO \| DIFF_HTML);
318	318	@ <div class="udiff">
319	319	@ %s(blob_str(&out))
320	320	@ </div>
321	321	}
322	322	blob_reset(&from);
		@@ -1298,11 +1298,11 @@
1298	1298	zStyle = "udiff";
1299	1299	}
1300	1300	}
1301	1301	content_get(v1, &c1);
1302	1302	content_get(v2, &c2);
1303		- text_diff(&c1, &c2, pOut, diffFlags);
	1303	+ text_diff(&c1, &c2, pOut, 0, diffFlags);
1304	1304	blob_reset(&c1);
1305	1305	blob_reset(&c2);
1306	1306	if( !isPatch ){
1307	1307	style_header("Diff");
1308	1308	style_submenu_element("Patch", "Patch", "%s/fdiff?v1=%T&v2=%T&patch",
1309	1309

	--- src/info.c
	+++ src/info.c
	@@ -307,16 +307,16 @@
307	}else{
308	blob_zero(&to);
309	}
310	blob_zero(&out);
311	if( diffFlags & DIFF_SIDEBYSIDE ){
312	text_diff(&from, &to, &out, diffFlags \| DIFF_HTML);
313	@ <div class="sbsdiff">
314	@ %s(blob_str(&out))
315	@ </div>
316	}else{
317	text_diff(&from, &to, &out, diffFlags \| DIFF_LINENO \| DIFF_HTML);
318	@ <div class="udiff">
319	@ %s(blob_str(&out))
320	@ </div>
321	}
322	blob_reset(&from);
	@@ -1298,11 +1298,11 @@
1298	zStyle = "udiff";
1299	}
1300	}
1301	content_get(v1, &c1);
1302	content_get(v2, &c2);
1303	text_diff(&c1, &c2, pOut, diffFlags);
1304	blob_reset(&c1);
1305	blob_reset(&c2);
1306	if( !isPatch ){
1307	style_header("Diff");
1308	style_submenu_element("Patch", "Patch", "%s/fdiff?v1=%T&v2=%T&patch",
1309

	--- src/info.c
	+++ src/info.c
	@@ -307,16 +307,16 @@
307	}else{
308	blob_zero(&to);
309	}
310	blob_zero(&out);
311	if( diffFlags & DIFF_SIDEBYSIDE ){
312	text_diff(&from, &to, &out, 0, diffFlags \| DIFF_HTML);
313	@ <div class="sbsdiff">
314	@ %s(blob_str(&out))
315	@ </div>
316	}else{
317	text_diff(&from, &to, &out, 0, diffFlags \| DIFF_LINENO \| DIFF_HTML);
318	@ <div class="udiff">
319	@ %s(blob_str(&out))
320	@ </div>
321	}
322	blob_reset(&from);
	@@ -1298,11 +1298,11 @@
1298	zStyle = "udiff";
1299	}
1300	}
1301	content_get(v1, &c1);
1302	content_get(v2, &c2);
1303	text_diff(&c1, &c2, pOut, 0, diffFlags);
1304	blob_reset(&c1);
1305	blob_reset(&c2);
1306	if( !isPatch ){
1307	style_header("Diff");
1308	style_submenu_element("Patch", "Patch", "%s/fdiff?v1=%T&v2=%T&patch",
1309

M src/json_diff.c

+1 -1

		--- src/json_diff.c
		+++ src/json_diff.c
		@@ -58,11 +58,11 @@
58	58	return NULL;
59	59	}
60	60	content_get(fromid, &from);
61	61	content_get(toid, &to);
62	62	blob_zero(&out);
63		- text_diff(&from, &to, &out, flags);
	63	+ text_diff(&from, &to, &out, 0, flags);
64	64	blob_reset(&from);
65	65	blob_reset(&to);
66	66	outLen = blob_size(&out);
67	67	if(outLen>=0){
68	68	rc = cson_value_new_string(blob_buffer(&out),
69	69

	--- src/json_diff.c
	+++ src/json_diff.c
	@@ -58,11 +58,11 @@
58	return NULL;
59	}
60	content_get(fromid, &from);
61	content_get(toid, &to);
62	blob_zero(&out);
63	text_diff(&from, &to, &out, flags);
64	blob_reset(&from);
65	blob_reset(&to);
66	outLen = blob_size(&out);
67	if(outLen>=0){
68	rc = cson_value_new_string(blob_buffer(&out),
69

	--- src/json_diff.c
	+++ src/json_diff.c
	@@ -58,11 +58,11 @@
58	return NULL;
59	}
60	content_get(fromid, &from);
61	content_get(toid, &to);
62	blob_zero(&out);
63	text_diff(&from, &to, &out, 0, flags);
64	blob_reset(&from);
65	blob_reset(&to);
66	outLen = blob_size(&out);
67	if(outLen>=0){
68	rc = cson_value_new_string(blob_buffer(&out),
69

M src/json_wiki.c

+1 -1

		--- src/json_wiki.c
		+++ src/json_wiki.c
		@@ -543,11 +543,11 @@
543	543	blob_init(&w1, pW1->zWiki, -1);
544	544	blob_zero(&w2);
545	545	blob_init(&w2, pW2->zWiki, -1);
546	546	blob_zero(&d);
547	547	diffFlags = DIFF_IGNORE_EOLWS \| DIFF_INLINE;
548		- text_diff(&w2, &w1, &d, diffFlags);
	548	+ text_diff(&w2, &w1, &d, 0, diffFlags);
549	549	blob_reset(&w1);
550	550	blob_reset(&w2);
551	551
552	552	pay = cson_new_object();
553	553
554	554

	--- src/json_wiki.c
	+++ src/json_wiki.c
	@@ -543,11 +543,11 @@
543	blob_init(&w1, pW1->zWiki, -1);
544	blob_zero(&w2);
545	blob_init(&w2, pW2->zWiki, -1);
546	blob_zero(&d);
547	diffFlags = DIFF_IGNORE_EOLWS \| DIFF_INLINE;
548	text_diff(&w2, &w1, &d, diffFlags);
549	blob_reset(&w1);
550	blob_reset(&w2);
551
552	pay = cson_new_object();
553
554

	--- src/json_wiki.c
	+++ src/json_wiki.c
	@@ -543,11 +543,11 @@
543	blob_init(&w1, pW1->zWiki, -1);
544	blob_zero(&w2);
545	blob_init(&w2, pW2->zWiki, -1);
546	blob_zero(&d);
547	diffFlags = DIFF_IGNORE_EOLWS \| DIFF_INLINE;
548	text_diff(&w2, &w1, &d, 0, diffFlags);
549	blob_reset(&w1);
550	blob_reset(&w2);
551
552	pay = cson_new_object();
553
554

M src/merge3.c

+2 -2

		--- src/merge3.c
		+++ src/merge3.c
		@@ -175,12 +175,12 @@
175	175	** is the number of lines of text to copy directly from the pivot,
176	176	** the second integer is the number of lines of text to omit from the
177	177	** pivot, and the third integer is the number of lines of text that are
178	178	** inserted. The edit array ends with a triple of 0,0,0.
179	179	*/
180		- aC1 = text_diff(pPivot, pV1, 0, 0);
181		- aC2 = text_diff(pPivot, pV2, 0, 0);
	180	+ aC1 = text_diff(pPivot, pV1, 0, 0, 0);
	181	+ aC2 = text_diff(pPivot, pV2, 0, 0, 0);
182	182	if( aC1==0 \|\| aC2==0 ){
183	183	free(aC1);
184	184	free(aC2);
185	185	return -1;
186	186	}
187	187

	--- src/merge3.c
	+++ src/merge3.c
	@@ -175,12 +175,12 @@
175	** is the number of lines of text to copy directly from the pivot,
176	** the second integer is the number of lines of text to omit from the
177	** pivot, and the third integer is the number of lines of text that are
178	** inserted. The edit array ends with a triple of 0,0,0.
179	*/
180	aC1 = text_diff(pPivot, pV1, 0, 0);
181	aC2 = text_diff(pPivot, pV2, 0, 0);
182	if( aC1==0 \|\| aC2==0 ){
183	free(aC1);
184	free(aC2);
185	return -1;
186	}
187

	--- src/merge3.c
	+++ src/merge3.c
	@@ -175,12 +175,12 @@
175	** is the number of lines of text to copy directly from the pivot,
176	** the second integer is the number of lines of text to omit from the
177	** pivot, and the third integer is the number of lines of text that are
178	** inserted. The edit array ends with a triple of 0,0,0.
179	*/
180	aC1 = text_diff(pPivot, pV1, 0, 0, 0);
181	aC2 = text_diff(pPivot, pV2, 0, 0, 0);
182	if( aC1==0 \|\| aC2==0 ){
183	free(aC1);
184	free(aC2);
185	return -1;
186	}
187

M src/regexp.c

+87 -64

		--- src/regexp.c
		+++ src/regexp.c
		@@ -94,19 +94,27 @@
94	94	unsigned nState; /* Number of current states */
95	95	ReStateNumber aState; / Current states */
96	96	} ReStateSet;
97	97
98	98	#if INTERFACE
	99	+/* An input string read one character at a time.
	100	+*/
	101	+struct ReInput {
	102	+ const unsigned char z; / All text */
	103	+ int i; /* Next byte to read */
	104	+ int mx; /* EOF when i>=mx */
	105	+};
	106	+
99	107	/* A compiled NFA (or an NFA that is in the process of being compiled) is
100	108	** an instance of the following object.
101	109	*/
102	110	struct ReCompiled {
103		- const unsigned char zIn; / Regular expression text */
	111	+ ReInput sIn; /* Regular expression text */
104	112	const char zErr; / Error message to return */
105	113	char aOp; / Operators for the virtual machine */
106	114	int aArg; / Arguments to each operator */
107		- unsigned (xNextChar)(const unsigned char); / Next character function */
	115	+ unsigned (xNextChar)(ReInput); /* Next character function */
108	116	char zInit[12]; /* Initial text to match */
109	117	int nInit; /* Number of characters in zInit */
110	118	unsigned nState; /* Number of entries in aOp[] and aArg[] */
111	119	unsigned nAlloc; /* Slots allocated for aOp[] and aArg[] */
112	120	};
		@@ -122,37 +130,37 @@
122	130	/* Extract the next unicode character from *pzIn and return it. Advance
123	131	** *pzIn to the first byte past the end of the character returned. To
124	132	** be clear: this routine converts utf8 to unicode. This routine is
125	133	** optimized for the common case where the next character is a single byte.
126	134	*/
127		-static unsigned re_next_char(const unsigned char **pzIn){
128		- unsigned c = **pzIn;
129		- if( c>0 ) (*pzIn)++;
	135	+static unsigned re_next_char(ReInput *p){
	136	+ unsigned c;
	137	+ if( p->i>=p->mx ) return 0;
	138	+ c = p->z[p->i++];
130	139	if( c>0x80 ){
131		- if( (c&0xe0)==0xc0 && ((*pzIn)[0]&0xc0)==0x80 ){
132		- c = (c&0x1f)<<6 \| ((*pzIn)[0]&0x3f);
133		- (*pzIn)++;
	140	+ if( (c&0xe0)==0xc0 && p->i<p->mx && (p->z[p->i]&0xc0)==0x80 ){
	141	+ c = (c&0x1f)<<6 \| (p->z[p->i++]&0x3f);
134	142	if( c<0x80 ) c = 0xfffd;
135		- }else if( (c&0xf0)==0xe0 && ((*pzIn)[0]&0xc0)==0x80
136		- && ((*pzIn)[1]&0xc0)==0x80 ){
137		- c = (c&0x0f)<<12 \| (((pzIn)[0]&0x3f)<<6) \| ((pzIn)[1]&0x3f);
138		- *pzIn += 2;
	143	+ }else if( (c&0xf0)==0xe0 && p->i+1<p->mx && (p->z[p->i]&0xc0)==0x80
	144	+ && (p->z[p->i+1]&0xc0)==0x80 ){
	145	+ c = (c&0x0f)<<12 \| ((p->z[p->i]&0x3f)<<6) \| (p->z[p->i+1]&0x3f);
	146	+ p->i += 2;
139	147	if( c<0x3ff \|\| (c>=0xd800 && c<=0xdfff) ) c = 0xfffd;
140		- }else if( (c&0xf8)==0xf0 && ((*pzIn)[0]&0xc0)==0x80
141		- && ((pzIn)[1]&0xc0)==0x80 && ((pzIn)[2]&0xc0)==0x80 ){
142		- c = (c&0x07)<<18 \| (((pzIn)[0]&0x3f)<<12) \| (((pzIn)[1]&0x3f)<<6)
143		- \| ((*pzIn)[2]&0x3f);
144		- *pzIn += 3;
	148	+ }else if( (c&0xf8)==0xf0 && p->i+3<p->mx && (p->z[p->i]&0xc0)==0x80
	149	+ && (p->z[p->i+1]&0xc0)==0x80 && (p->z[p->i+2]&0xc0)==0x80 ){
	150	+ c = (c&0x07)<<18 \| ((p->z[p->i]&0x3f)<<12) \| ((p->z[p->i+1]&0x3f)<<6)
	151	+ \| (p->z[p->i+2]&0x3f);
	152	+ p->i += 3;
145	153	if( c<0xffff ) c = 0xfffd;
146	154	}else{
147	155	c = 0xfffd;
148	156	}
149	157	}
150	158	return c;
151	159	}
152		-static unsigned re_next_char_nocase(const unsigned char **pzIn){
153		- unsigned c = re_next_char(pzIn);
	160	+static unsigned re_next_char_nocase(ReInput *p){
	161	+ unsigned c = re_next_char(p);
154	162	return unicode_fold(c,1);
155	163	}
156	164
157	165	/* Return true if c is a perl "word" character: [A-Za-z0-9_] */
158	166	static int re_word_char(int c){
		@@ -170,26 +178,32 @@
170	178	}
171	179
172	180	/* Run a compiled regular expression on the zero-terminated input
173	181	** string zIn[]. Return true on a match and false if there is no match.
174	182	*/
175		-int re_exec(ReCompiled pRe, const unsigned char zIn){
	183	+int re_exec(ReCompiled pRe, const unsigned char zIn, int nIn){
176	184	ReStateSet aStateSet[2], pThis, pNext;
177	185	ReStateNumber aSpace[100];
178	186	ReStateNumber *pToFree;
179	187	unsigned int i = 0;
180	188	unsigned int iSwap = 0;
181	189	int c = RE_EOF+1;
182	190	int cPrev = 0;
183	191	int rc = 0;
184		-
	192	+ ReInput in;
	193	+
	194	+ in.z = zIn;
	195	+ in.i = 0;
	196	+ in.mx = nIn>=0 ? nIn : strlen(zIn);
185	197	if( pRe->nInit ){
186	198	unsigned char x = pRe->zInit[0];
187		- while( zIn[0] && (zIn[0]!=x \|\| memcmp(zIn, pRe->zInit, pRe->nInit)!=0) ){
188		- zIn++;
	199	+ while( in.i+pRe->nInit<in.mx
	200	+ && (zIn[in.i]!=x \|\| memcmp(zIn+in.i, pRe->zInit, pRe->nInit)!=0)
	201	+ ){
	202	+ in.i++;
189	203	}
190		- if( zIn[0]==0 ) return 0;
	204	+ if( in.i+pRe->nInit>=in.mx ) return 0;
191	205	}
192	206	if( pRe->nState<=(sizeof(aSpace)/(sizeof(aSpace[0])*2)) ){
193	207	pToFree = 0;
194	208	aStateSet[0].aState = aSpace;
195	209	}else{
		@@ -201,11 +215,11 @@
201	215	pNext = &aStateSet[1];
202	216	pNext->nState = 0;
203	217	re_add_state(pNext, 0);
204	218	while( c!=RE_EOF && pNext->nState>0 ){
205	219	cPrev = c;
206		- c = pRe->xNextChar(&zIn);
	220	+ c = pRe->xNextChar(&in);
207	221	pThis = pNext;
208	222	pNext = &aStateSet[iSwap];
209	223	iSwap = 1 - iSwap;
210	224	pNext->nState = 0;
211	225	for(i=0; i<pThis->nState; i++){
		@@ -370,42 +384,50 @@
370	384	*/
371	385	static unsigned re_esc_char(ReCompiled *p){
372	386	static const char zEsc[] = "afnrtv\\()*.+?[$^{\|}]";
373	387	static const char zTrans[] = "\a\f\n\r\t\v";
374	388	int i, v = 0;
375		- char c = p->zIn[0];
376		- if( c=='u' ){
	389	+ char c;
	390	+ if( p->sIn.i>=p->sIn.mx ) return 0;
	391	+ c = p->sIn.z[0];
	392	+ if( c=='u' && p->sIn.i+5<p->sIn.mx ){
377	393	v = 0;
378		- if( re_hex(p->zIn[1],&v)
379		- && re_hex(p->zIn[2],&v)
380		- && re_hex(p->zIn[3],&v)
381		- && re_hex(p->zIn[4],&v)
	394	+ const unsigned char *zIn = p->sIn.z + p->sIn.i;
	395	+ if( re_hex(zIn[1],&v)
	396	+ && re_hex(zIn[2],&v)
	397	+ && re_hex(zIn[3],&v)
	398	+ && re_hex(zIn[4],&v)
382	399	){
383		- p->zIn += 5;
	400	+ p->sIn.i += 5;
384	401	return v;
385	402	}
386	403	}
387	404	if( c=='x' ){
388	405	v = 0;
389		- for(i=1; re_hex(p->zIn[i], &v); i++){}
	406	+ for(i=1; p->sIn.i<p->sIn.mx && re_hex(p->sIn.z[p->sIn.i+i], &v); i++){}
390	407	if( i>1 ){
391		- p->zIn += i;
	408	+ p->sIn.i += i;
392	409	return v;
393	410	}
394	411	}
395	412	for(i=0; zEsc[i] && zEsc[i]!=c; i++){}
396	413	if( zEsc[i] ){
397	414	if( i<6 ) c = zTrans[i];
398		- p->zIn++;
	415	+ p->sIn.i++;
399	416	}else{
400	417	p->zErr = "unknown \\ escape";
401	418	}
402	419	return c;
403	420	}
404	421
405	422	/* Forward declaration */
406	423	static const char re_subcompile_string(ReCompiled);
	424	+
	425	+/* Peek at the next byte of input */
	426	+static unsigned char rePeek(ReCompiled *p){
	427	+ return p->sIn.i<p->sIn.mx ? p->sIn.z[p->sIn.i] : 0;
	428	+}
407	429
408	430	/* Compile RE text into a sequence of opcodes. Continue up to the
409	431	** first unmatched ")" character, then return. If an error is found,
410	432	** return a pointer to the error message string.
411	433	*/
		@@ -413,15 +435,15 @@
413	435	const char *zErr;
414	436	int iStart, iEnd, iGoto;
415	437	iStart = p->nState;
416	438	zErr = re_subcompile_string(p);
417	439	if( zErr ) return zErr;
418		- while( p->zIn[0]=='\|' ){
	440	+ while( rePeek(p)=='\|' ){
419	441	iEnd = p->nState;
420	442	re_insert(p, iStart, RE_OP_FORK, iEnd + 2 - iStart);
421	443	iGoto = re_append(p, RE_OP_GOTO, 0);
422		- p->zIn++;
	444	+ p->sIn.i++;
423	445	zErr = re_subcompile_string(p);
424	446	if( zErr ) return zErr;
425	447	p->aArg[iGoto] = p->nState - iGoto;
426	448	}
427	449	return 0;
		@@ -434,30 +456,30 @@
434	456	static const char re_subcompile_string(ReCompiled p){
435	457	int iPrev = -1;
436	458	int iStart;
437	459	unsigned c;
438	460	const char *zErr;
439		- while( (c = p->xNextChar(&p->zIn))!=0 ){
	461	+ while( (c = p->xNextChar(&p->sIn))!=0 ){
440	462	iStart = p->nState;
441	463	switch( c ){
442	464	case '\|':
443	465	case '$':
444	466	case ')': {
445		- p->zIn--;
	467	+ p->sIn.i--;
446	468	return 0;
447	469	}
448	470	case '(': {
449	471	zErr = re_subcompile_re(p);
450	472	if( zErr ) return zErr;
451		- if( p->zIn[0]!=')' ) return "unmatched '('";
452		- p->zIn++;
	473	+ if( rePeek(p)!=')' ) return "unmatched '('";
	474	+ p->sIn.i++;
453	475	break;
454	476	}
455	477	case '.': {
456		- if( p->zIn[0]=='*' ){
	478	+ if( rePeek(p)=='*' ){
457	479	re_append(p, RE_OP_ANYSTAR, 0);
458		- p->zIn++;
	480	+ p->sIn.i++;
459	481	}else{
460	482	re_append(p, RE_OP_ANY, 0);
461	483	}
462	484	break;
463	485	}
		@@ -479,20 +501,20 @@
479	501	}
480	502	case '{': {
481	503	int m = 0, n = 0;
482	504	int sz, j;
483	505	if( iPrev<0 ) return "'{m,n}' without operand";
484		- while( (c=p->zIn[0])>='0' && c<='9' ){ m = m*10 + c - '0'; p->zIn++; }
	506	+ while( (c=rePeek(p))>='0' && c<='9' ){ m = m*10 + c - '0'; p->sIn.i++; }
485	507	n = m;
486	508	if( c==',' ){
487		- p->zIn++;
	509	+ p->sIn.i++;
488	510	n = 0;
489		- while( (c=p->zIn[0])>='0' && c<='9' ){ n = n*10 + c - '0'; p->zIn++; }
	511	+ while( (c=rePeek(p))>='0' && c<='9' ){ n = n*10 + c-'0'; p->sIn.i++; }
490	512	}
491	513	if( c!='}' ) return "unmatched '{'";
492	514	if( n>0 && n<m ) return "n less than m in '{m,n}'";
493		- p->zIn++;
	515	+ p->sIn.i++;
494	516	sz = p->nState - iPrev;
495	517	if( m==0 ){
496	518	if( n==0 ) return "both m and n are zero in '{m,n}'";
497	519	re_insert(p, iPrev, RE_OP_FORK, sz+1);
498	520	n--;
		@@ -508,49 +530,49 @@
508	530	}
509	531	break;
510	532	}
511	533	case '[': {
512	534	int iFirst = p->nState;
513		- if( p->zIn[0]=='^' ){
	535	+ if( rePeek(p)=='^' ){
514	536	re_append(p, RE_OP_CC_EXC, 0);
515		- p->zIn++;
	537	+ p->sIn.i++;
516	538	}else{
517	539	re_append(p, RE_OP_CC_INC, 0);
518	540	}
519		- while( (c = p->xNextChar(&p->zIn))!=0 ){
520		- if( c=='[' && p->zIn[0]==':' ){
	541	+ while( (c = p->xNextChar(&p->sIn))!=0 ){
	542	+ if( c=='[' && rePeek(p)==':' ){
521	543	return "POSIX character classes not supported";
522	544	}
523	545	if( c=='\\' ) c = re_esc_char(p);
524		- if( p->zIn[0]=='-' && p->zIn[1] ){
	546	+ if( rePeek(p)=='-' ){
525	547	re_append(p, RE_OP_CC_RANGE, c);
526		- p->zIn++;
527		- c = p->xNextChar(&p->zIn);
	548	+ p->sIn.i++;
	549	+ c = p->xNextChar(&p->sIn);
528	550	if( c=='\\' ) c = re_esc_char(p);
529	551	re_append(p, RE_OP_CC_RANGE, c);
530	552	}else{
531	553	re_append(p, RE_OP_CC_VALUE, c);
532	554	}
533		- if( p->zIn[0]==']' ){ p->zIn++; break; }
	555	+ if( rePeek(p)==']' ){ p->sIn.i++; break; }
534	556	}
535	557	if( c==0 ) return "unclosed '['";
536	558	p->aArg[iFirst] = p->nState - iFirst;
537	559	break;
538	560	}
539	561	case '\\': {
540	562	int specialOp = 0;
541		- switch( p->zIn[0] ){
	563	+ switch( rePeek(p) ){
542	564	case 'b': specialOp = RE_OP_BOUNDARY; break;
543	565	case 'd': specialOp = RE_OP_DIGIT; break;
544	566	case 'D': specialOp = RE_OP_NOTDIGIT; break;
545	567	case 's': specialOp = RE_OP_SPACE; break;
546	568	case 'S': specialOp = RE_OP_NOTSPACE; break;
547	569	case 'w': specialOp = RE_OP_WORD; break;
548	570	case 'W': specialOp = RE_OP_NOTWORD; break;
549	571	}
550	572	if( specialOp ){
551		- p->zIn++;
	573	+ p->sIn.i++;
552	574	re_append(p, specialOp, 0);
553	575	}else{
554	576	c = re_esc_char(p);
555	577	re_append(p, RE_OP_MATCH, c);
556	578	}
		@@ -602,21 +624,23 @@
602	624	if( zIn[0]=='^' ){
603	625	zIn++;
604	626	}else{
605	627	re_append(pRe, RE_OP_ANYSTAR, 0);
606	628	}
607		- pRe->zIn = (unsigned char*)zIn;
	629	+ pRe->sIn.z = (unsigned char*)zIn;
	630	+ pRe->sIn.i = 0;
	631	+ pRe->sIn.mx = strlen(pRe->sIn.z);
608	632	zErr = re_subcompile_re(pRe);
609	633	if( zErr ){
610	634	re_free(pRe);
611	635	return zErr;
612	636	}
613		- if( pRe->zIn[0]=='$' && pRe->zIn[1]==0 ){
	637	+ if( rePeek(pRe)=='$' && pRe->sIn.i+1==pRe->sIn.mx ){
614	638	re_append(pRe, RE_OP_MATCH, RE_EOF);
615	639	re_append(pRe, RE_OP_ACCEPT, 0);
616	640	*ppRe = pRe;
617		- }else if( pRe->zIn[0]==0 ){
	641	+ }else if( pRe->sIn.i>=pRe->sIn.mx ){
618	642	re_append(pRe, RE_OP_ACCEPT, 0);
619	643	*ppRe = pRe;
620	644	}else{
621	645	re_free(pRe);
622	646	return "unrecognized character";
		@@ -676,11 +700,11 @@
676	700	}
677	701	sqlite3_set_auxdata(context, 0, pRe, (void()(void))re_free);
678	702	}
679	703	zStr = (const unsigned char*)sqlite3_value_text(argv[1]);
680	704	if( zStr!=0 ){
681		- sqlite3_result_int(context, re_exec(pRe, zStr));
	705	+ sqlite3_result_int(context, re_exec(pRe, zStr, -1));
682	706	}
683	707	}
684	708
685	709	/*
686	710	** Invoke this routine in order to install the REGEXP function in an
		@@ -707,13 +731,12 @@
707	731	char zLine[2000];
708	732	while( fgets(zLine, sizeof(zLine), in) ){
709	733	ln++;
710	734	n = (int)strlen(zLine);
711	735	while( n && (zLine[n-1]=='\n' \|\| zLine[n-1]=='\r') ) n--;
712		- zLine[n] = 0;
713		- if( re_exec(pRe, (const unsigned char*)zLine) ){
714		- printf("%s:%d:%s\n", zFile, ln, zLine);
	736	+ if( re_exec(pRe, (const unsigned char*)zLine, n) ){
	737	+ printf("%s:%d:%.*s\n", zFile, ln, n, zLine);
715	738	}
716	739	}
717	740	}
718	741
719	742	/*
720	743

	--- src/regexp.c
	+++ src/regexp.c
	@@ -94,19 +94,27 @@
94	unsigned nState; /* Number of current states */
95	ReStateNumber aState; / Current states */
96	} ReStateSet;
97
98	#if INTERFACE








99	/* A compiled NFA (or an NFA that is in the process of being compiled) is
100	** an instance of the following object.
101	*/
102	struct ReCompiled {
103	const unsigned char zIn; / Regular expression text */
104	const char zErr; / Error message to return */
105	char aOp; / Operators for the virtual machine */
106	int aArg; / Arguments to each operator */
107	unsigned (xNextChar)(const unsigned char); / Next character function */
108	char zInit[12]; /* Initial text to match */
109	int nInit; /* Number of characters in zInit */
110	unsigned nState; /* Number of entries in aOp[] and aArg[] */
111	unsigned nAlloc; /* Slots allocated for aOp[] and aArg[] */
112	};
	@@ -122,37 +130,37 @@
122	/* Extract the next unicode character from *pzIn and return it. Advance
123	** *pzIn to the first byte past the end of the character returned. To
124	** be clear: this routine converts utf8 to unicode. This routine is
125	** optimized for the common case where the next character is a single byte.
126	*/
127	static unsigned re_next_char(const unsigned char **pzIn){
128	unsigned c = **pzIn;
129	if( c>0 ) (*pzIn)++;

130	if( c>0x80 ){
131	if( (c&0xe0)==0xc0 && ((*pzIn)[0]&0xc0)==0x80 ){
132	c = (c&0x1f)<<6 \| ((*pzIn)[0]&0x3f);
133	(*pzIn)++;
134	if( c<0x80 ) c = 0xfffd;
135	}else if( (c&0xf0)==0xe0 && ((*pzIn)[0]&0xc0)==0x80
136	&& ((*pzIn)[1]&0xc0)==0x80 ){
137	c = (c&0x0f)<<12 \| (((pzIn)[0]&0x3f)<<6) \| ((pzIn)[1]&0x3f);
138	*pzIn += 2;
139	if( c<0x3ff \|\| (c>=0xd800 && c<=0xdfff) ) c = 0xfffd;
140	}else if( (c&0xf8)==0xf0 && ((*pzIn)[0]&0xc0)==0x80
141	&& ((pzIn)[1]&0xc0)==0x80 && ((pzIn)[2]&0xc0)==0x80 ){
142	c = (c&0x07)<<18 \| (((pzIn)[0]&0x3f)<<12) \| (((pzIn)[1]&0x3f)<<6)
143	\| ((*pzIn)[2]&0x3f);
144	*pzIn += 3;
145	if( c<0xffff ) c = 0xfffd;
146	}else{
147	c = 0xfffd;
148	}
149	}
150	return c;
151	}
152	static unsigned re_next_char_nocase(const unsigned char **pzIn){
153	unsigned c = re_next_char(pzIn);
154	return unicode_fold(c,1);
155	}
156
157	/* Return true if c is a perl "word" character: [A-Za-z0-9_] */
158	static int re_word_char(int c){
	@@ -170,26 +178,32 @@
170	}
171
172	/* Run a compiled regular expression on the zero-terminated input
173	** string zIn[]. Return true on a match and false if there is no match.
174	*/
175	int re_exec(ReCompiled pRe, const unsigned char zIn){
176	ReStateSet aStateSet[2], pThis, pNext;
177	ReStateNumber aSpace[100];
178	ReStateNumber *pToFree;
179	unsigned int i = 0;
180	unsigned int iSwap = 0;
181	int c = RE_EOF+1;
182	int cPrev = 0;
183	int rc = 0;
184




185	if( pRe->nInit ){
186	unsigned char x = pRe->zInit[0];
187	while( zIn[0] && (zIn[0]!=x \|\| memcmp(zIn, pRe->zInit, pRe->nInit)!=0) ){
188	zIn++;


189	}
190	if( zIn[0]==0 ) return 0;
191	}
192	if( pRe->nState<=(sizeof(aSpace)/(sizeof(aSpace[0])*2)) ){
193	pToFree = 0;
194	aStateSet[0].aState = aSpace;
195	}else{
	@@ -201,11 +215,11 @@
201	pNext = &aStateSet[1];
202	pNext->nState = 0;
203	re_add_state(pNext, 0);
204	while( c!=RE_EOF && pNext->nState>0 ){
205	cPrev = c;
206	c = pRe->xNextChar(&zIn);
207	pThis = pNext;
208	pNext = &aStateSet[iSwap];
209	iSwap = 1 - iSwap;
210	pNext->nState = 0;
211	for(i=0; i<pThis->nState; i++){
	@@ -370,42 +384,50 @@
370	*/
371	static unsigned re_esc_char(ReCompiled *p){
372	static const char zEsc[] = "afnrtv\\()*.+?[$^{\|}]";
373	static const char zTrans[] = "\a\f\n\r\t\v";
374	int i, v = 0;
375	char c = p->zIn[0];
376	if( c=='u' ){


377	v = 0;
378	if( re_hex(p->zIn[1],&v)
379	&& re_hex(p->zIn[2],&v)
380	&& re_hex(p->zIn[3],&v)
381	&& re_hex(p->zIn[4],&v)

382	){
383	p->zIn += 5;
384	return v;
385	}
386	}
387	if( c=='x' ){
388	v = 0;
389	for(i=1; re_hex(p->zIn[i], &v); i++){}
390	if( i>1 ){
391	p->zIn += i;
392	return v;
393	}
394	}
395	for(i=0; zEsc[i] && zEsc[i]!=c; i++){}
396	if( zEsc[i] ){
397	if( i<6 ) c = zTrans[i];
398	p->zIn++;
399	}else{
400	p->zErr = "unknown \\ escape";
401	}
402	return c;
403	}
404
405	/* Forward declaration */
406	static const char re_subcompile_string(ReCompiled);





407
408	/* Compile RE text into a sequence of opcodes. Continue up to the
409	** first unmatched ")" character, then return. If an error is found,
410	** return a pointer to the error message string.
411	*/
	@@ -413,15 +435,15 @@
413	const char *zErr;
414	int iStart, iEnd, iGoto;
415	iStart = p->nState;
416	zErr = re_subcompile_string(p);
417	if( zErr ) return zErr;
418	while( p->zIn[0]=='\|' ){
419	iEnd = p->nState;
420	re_insert(p, iStart, RE_OP_FORK, iEnd + 2 - iStart);
421	iGoto = re_append(p, RE_OP_GOTO, 0);
422	p->zIn++;
423	zErr = re_subcompile_string(p);
424	if( zErr ) return zErr;
425	p->aArg[iGoto] = p->nState - iGoto;
426	}
427	return 0;
	@@ -434,30 +456,30 @@
434	static const char re_subcompile_string(ReCompiled p){
435	int iPrev = -1;
436	int iStart;
437	unsigned c;
438	const char *zErr;
439	while( (c = p->xNextChar(&p->zIn))!=0 ){
440	iStart = p->nState;
441	switch( c ){
442	case '\|':
443	case '$':
444	case ')': {
445	p->zIn--;
446	return 0;
447	}
448	case '(': {
449	zErr = re_subcompile_re(p);
450	if( zErr ) return zErr;
451	if( p->zIn[0]!=')' ) return "unmatched '('";
452	p->zIn++;
453	break;
454	}
455	case '.': {
456	if( p->zIn[0]=='*' ){
457	re_append(p, RE_OP_ANYSTAR, 0);
458	p->zIn++;
459	}else{
460	re_append(p, RE_OP_ANY, 0);
461	}
462	break;
463	}
	@@ -479,20 +501,20 @@
479	}
480	case '{': {
481	int m = 0, n = 0;
482	int sz, j;
483	if( iPrev<0 ) return "'{m,n}' without operand";
484	while( (c=p->zIn[0])>='0' && c<='9' ){ m = m*10 + c - '0'; p->zIn++; }
485	n = m;
486	if( c==',' ){
487	p->zIn++;
488	n = 0;
489	while( (c=p->zIn[0])>='0' && c<='9' ){ n = n*10 + c - '0'; p->zIn++; }
490	}
491	if( c!='}' ) return "unmatched '{'";
492	if( n>0 && n<m ) return "n less than m in '{m,n}'";
493	p->zIn++;
494	sz = p->nState - iPrev;
495	if( m==0 ){
496	if( n==0 ) return "both m and n are zero in '{m,n}'";
497	re_insert(p, iPrev, RE_OP_FORK, sz+1);
498	n--;
	@@ -508,49 +530,49 @@
508	}
509	break;
510	}
511	case '[': {
512	int iFirst = p->nState;
513	if( p->zIn[0]=='^' ){
514	re_append(p, RE_OP_CC_EXC, 0);
515	p->zIn++;
516	}else{
517	re_append(p, RE_OP_CC_INC, 0);
518	}
519	while( (c = p->xNextChar(&p->zIn))!=0 ){
520	if( c=='[' && p->zIn[0]==':' ){
521	return "POSIX character classes not supported";
522	}
523	if( c=='\\' ) c = re_esc_char(p);
524	if( p->zIn[0]=='-' && p->zIn[1] ){
525	re_append(p, RE_OP_CC_RANGE, c);
526	p->zIn++;
527	c = p->xNextChar(&p->zIn);
528	if( c=='\\' ) c = re_esc_char(p);
529	re_append(p, RE_OP_CC_RANGE, c);
530	}else{
531	re_append(p, RE_OP_CC_VALUE, c);
532	}
533	if( p->zIn[0]==']' ){ p->zIn++; break; }
534	}
535	if( c==0 ) return "unclosed '['";
536	p->aArg[iFirst] = p->nState - iFirst;
537	break;
538	}
539	case '\\': {
540	int specialOp = 0;
541	switch( p->zIn[0] ){
542	case 'b': specialOp = RE_OP_BOUNDARY; break;
543	case 'd': specialOp = RE_OP_DIGIT; break;
544	case 'D': specialOp = RE_OP_NOTDIGIT; break;
545	case 's': specialOp = RE_OP_SPACE; break;
546	case 'S': specialOp = RE_OP_NOTSPACE; break;
547	case 'w': specialOp = RE_OP_WORD; break;
548	case 'W': specialOp = RE_OP_NOTWORD; break;
549	}
550	if( specialOp ){
551	p->zIn++;
552	re_append(p, specialOp, 0);
553	}else{
554	c = re_esc_char(p);
555	re_append(p, RE_OP_MATCH, c);
556	}
	@@ -602,21 +624,23 @@
602	if( zIn[0]=='^' ){
603	zIn++;
604	}else{
605	re_append(pRe, RE_OP_ANYSTAR, 0);
606	}
607	pRe->zIn = (unsigned char*)zIn;


608	zErr = re_subcompile_re(pRe);
609	if( zErr ){
610	re_free(pRe);
611	return zErr;
612	}
613	if( pRe->zIn[0]=='$' && pRe->zIn[1]==0 ){
614	re_append(pRe, RE_OP_MATCH, RE_EOF);
615	re_append(pRe, RE_OP_ACCEPT, 0);
616	*ppRe = pRe;
617	}else if( pRe->zIn[0]==0 ){
618	re_append(pRe, RE_OP_ACCEPT, 0);
619	*ppRe = pRe;
620	}else{
621	re_free(pRe);
622	return "unrecognized character";
	@@ -676,11 +700,11 @@
676	}
677	sqlite3_set_auxdata(context, 0, pRe, (void()(void))re_free);
678	}
679	zStr = (const unsigned char*)sqlite3_value_text(argv[1]);
680	if( zStr!=0 ){
681	sqlite3_result_int(context, re_exec(pRe, zStr));
682	}
683	}
684
685	/*
686	** Invoke this routine in order to install the REGEXP function in an
	@@ -707,13 +731,12 @@
707	char zLine[2000];
708	while( fgets(zLine, sizeof(zLine), in) ){
709	ln++;
710	n = (int)strlen(zLine);
711	while( n && (zLine[n-1]=='\n' \|\| zLine[n-1]=='\r') ) n--;
712	zLine[n] = 0;
713	if( re_exec(pRe, (const unsigned char*)zLine) ){
714	printf("%s:%d:%s\n", zFile, ln, zLine);
715	}
716	}
717	}
718
719	/*
720

	--- src/regexp.c
	+++ src/regexp.c
	@@ -94,19 +94,27 @@
94	unsigned nState; /* Number of current states */
95	ReStateNumber aState; / Current states */
96	} ReStateSet;
97
98	#if INTERFACE
99	/* An input string read one character at a time.
100	*/
101	struct ReInput {
102	const unsigned char z; / All text */
103	int i; /* Next byte to read */
104	int mx; /* EOF when i>=mx */
105	};
106
107	/* A compiled NFA (or an NFA that is in the process of being compiled) is
108	** an instance of the following object.
109	*/
110	struct ReCompiled {
111	ReInput sIn; /* Regular expression text */
112	const char zErr; / Error message to return */
113	char aOp; / Operators for the virtual machine */
114	int aArg; / Arguments to each operator */
115	unsigned (xNextChar)(ReInput); /* Next character function */
116	char zInit[12]; /* Initial text to match */
117	int nInit; /* Number of characters in zInit */
118	unsigned nState; /* Number of entries in aOp[] and aArg[] */
119	unsigned nAlloc; /* Slots allocated for aOp[] and aArg[] */
120	};
	@@ -122,37 +130,37 @@
130	/* Extract the next unicode character from *pzIn and return it. Advance
131	** *pzIn to the first byte past the end of the character returned. To
132	** be clear: this routine converts utf8 to unicode. This routine is
133	** optimized for the common case where the next character is a single byte.
134	*/
135	static unsigned re_next_char(ReInput *p){
136	unsigned c;
137	if( p->i>=p->mx ) return 0;
138	c = p->z[p->i++];
139	if( c>0x80 ){
140	if( (c&0xe0)==0xc0 && p->i<p->mx && (p->z[p->i]&0xc0)==0x80 ){
141	c = (c&0x1f)<<6 \| (p->z[p->i++]&0x3f);

142	if( c<0x80 ) c = 0xfffd;
143	}else if( (c&0xf0)==0xe0 && p->i+1<p->mx && (p->z[p->i]&0xc0)==0x80
144	&& (p->z[p->i+1]&0xc0)==0x80 ){
145	c = (c&0x0f)<<12 \| ((p->z[p->i]&0x3f)<<6) \| (p->z[p->i+1]&0x3f);
146	p->i += 2;
147	if( c<0x3ff \|\| (c>=0xd800 && c<=0xdfff) ) c = 0xfffd;
148	}else if( (c&0xf8)==0xf0 && p->i+3<p->mx && (p->z[p->i]&0xc0)==0x80
149	&& (p->z[p->i+1]&0xc0)==0x80 && (p->z[p->i+2]&0xc0)==0x80 ){
150	c = (c&0x07)<<18 \| ((p->z[p->i]&0x3f)<<12) \| ((p->z[p->i+1]&0x3f)<<6)
151	\| (p->z[p->i+2]&0x3f);
152	p->i += 3;
153	if( c<0xffff ) c = 0xfffd;
154	}else{
155	c = 0xfffd;
156	}
157	}
158	return c;
159	}
160	static unsigned re_next_char_nocase(ReInput *p){
161	unsigned c = re_next_char(p);
162	return unicode_fold(c,1);
163	}
164
165	/* Return true if c is a perl "word" character: [A-Za-z0-9_] */
166	static int re_word_char(int c){
	@@ -170,26 +178,32 @@
178	}
179
180	/* Run a compiled regular expression on the zero-terminated input
181	** string zIn[]. Return true on a match and false if there is no match.
182	*/
183	int re_exec(ReCompiled pRe, const unsigned char zIn, int nIn){
184	ReStateSet aStateSet[2], pThis, pNext;
185	ReStateNumber aSpace[100];
186	ReStateNumber *pToFree;
187	unsigned int i = 0;
188	unsigned int iSwap = 0;
189	int c = RE_EOF+1;
190	int cPrev = 0;
191	int rc = 0;
192	ReInput in;
193
194	in.z = zIn;
195	in.i = 0;
196	in.mx = nIn>=0 ? nIn : strlen(zIn);
197	if( pRe->nInit ){
198	unsigned char x = pRe->zInit[0];
199	while( in.i+pRe->nInit<in.mx
200	&& (zIn[in.i]!=x \|\| memcmp(zIn+in.i, pRe->zInit, pRe->nInit)!=0)
201	){
202	in.i++;
203	}
204	if( in.i+pRe->nInit>=in.mx ) return 0;
205	}
206	if( pRe->nState<=(sizeof(aSpace)/(sizeof(aSpace[0])*2)) ){
207	pToFree = 0;
208	aStateSet[0].aState = aSpace;
209	}else{
	@@ -201,11 +215,11 @@
215	pNext = &aStateSet[1];
216	pNext->nState = 0;
217	re_add_state(pNext, 0);
218	while( c!=RE_EOF && pNext->nState>0 ){
219	cPrev = c;
220	c = pRe->xNextChar(&in);
221	pThis = pNext;
222	pNext = &aStateSet[iSwap];
223	iSwap = 1 - iSwap;
224	pNext->nState = 0;
225	for(i=0; i<pThis->nState; i++){
	@@ -370,42 +384,50 @@
384	*/
385	static unsigned re_esc_char(ReCompiled *p){
386	static const char zEsc[] = "afnrtv\\()*.+?[$^{\|}]";
387	static const char zTrans[] = "\a\f\n\r\t\v";
388	int i, v = 0;
389	char c;
390	if( p->sIn.i>=p->sIn.mx ) return 0;
391	c = p->sIn.z[0];
392	if( c=='u' && p->sIn.i+5<p->sIn.mx ){
393	v = 0;
394	const unsigned char *zIn = p->sIn.z + p->sIn.i;
395	if( re_hex(zIn[1],&v)
396	&& re_hex(zIn[2],&v)
397	&& re_hex(zIn[3],&v)
398	&& re_hex(zIn[4],&v)
399	){
400	p->sIn.i += 5;
401	return v;
402	}
403	}
404	if( c=='x' ){
405	v = 0;
406	for(i=1; p->sIn.i<p->sIn.mx && re_hex(p->sIn.z[p->sIn.i+i], &v); i++){}
407	if( i>1 ){
408	p->sIn.i += i;
409	return v;
410	}
411	}
412	for(i=0; zEsc[i] && zEsc[i]!=c; i++){}
413	if( zEsc[i] ){
414	if( i<6 ) c = zTrans[i];
415	p->sIn.i++;
416	}else{
417	p->zErr = "unknown \\ escape";
418	}
419	return c;
420	}
421
422	/* Forward declaration */
423	static const char re_subcompile_string(ReCompiled);
424
425	/* Peek at the next byte of input */
426	static unsigned char rePeek(ReCompiled *p){
427	return p->sIn.i<p->sIn.mx ? p->sIn.z[p->sIn.i] : 0;
428	}
429
430	/* Compile RE text into a sequence of opcodes. Continue up to the
431	** first unmatched ")" character, then return. If an error is found,
432	** return a pointer to the error message string.
433	*/
	@@ -413,15 +435,15 @@
435	const char *zErr;
436	int iStart, iEnd, iGoto;
437	iStart = p->nState;
438	zErr = re_subcompile_string(p);
439	if( zErr ) return zErr;
440	while( rePeek(p)=='\|' ){
441	iEnd = p->nState;
442	re_insert(p, iStart, RE_OP_FORK, iEnd + 2 - iStart);
443	iGoto = re_append(p, RE_OP_GOTO, 0);
444	p->sIn.i++;
445	zErr = re_subcompile_string(p);
446	if( zErr ) return zErr;
447	p->aArg[iGoto] = p->nState - iGoto;
448	}
449	return 0;
	@@ -434,30 +456,30 @@
456	static const char re_subcompile_string(ReCompiled p){
457	int iPrev = -1;
458	int iStart;
459	unsigned c;
460	const char *zErr;
461	while( (c = p->xNextChar(&p->sIn))!=0 ){
462	iStart = p->nState;
463	switch( c ){
464	case '\|':
465	case '$':
466	case ')': {
467	p->sIn.i--;
468	return 0;
469	}
470	case '(': {
471	zErr = re_subcompile_re(p);
472	if( zErr ) return zErr;
473	if( rePeek(p)!=')' ) return "unmatched '('";
474	p->sIn.i++;
475	break;
476	}
477	case '.': {
478	if( rePeek(p)=='*' ){
479	re_append(p, RE_OP_ANYSTAR, 0);
480	p->sIn.i++;
481	}else{
482	re_append(p, RE_OP_ANY, 0);
483	}
484	break;
485	}
	@@ -479,20 +501,20 @@
501	}
502	case '{': {
503	int m = 0, n = 0;
504	int sz, j;
505	if( iPrev<0 ) return "'{m,n}' without operand";
506	while( (c=rePeek(p))>='0' && c<='9' ){ m = m*10 + c - '0'; p->sIn.i++; }
507	n = m;
508	if( c==',' ){
509	p->sIn.i++;
510	n = 0;
511	while( (c=rePeek(p))>='0' && c<='9' ){ n = n*10 + c-'0'; p->sIn.i++; }
512	}
513	if( c!='}' ) return "unmatched '{'";
514	if( n>0 && n<m ) return "n less than m in '{m,n}'";
515	p->sIn.i++;
516	sz = p->nState - iPrev;
517	if( m==0 ){
518	if( n==0 ) return "both m and n are zero in '{m,n}'";
519	re_insert(p, iPrev, RE_OP_FORK, sz+1);
520	n--;
	@@ -508,49 +530,49 @@
530	}
531	break;
532	}
533	case '[': {
534	int iFirst = p->nState;
535	if( rePeek(p)=='^' ){
536	re_append(p, RE_OP_CC_EXC, 0);
537	p->sIn.i++;
538	}else{
539	re_append(p, RE_OP_CC_INC, 0);
540	}
541	while( (c = p->xNextChar(&p->sIn))!=0 ){
542	if( c=='[' && rePeek(p)==':' ){
543	return "POSIX character classes not supported";
544	}
545	if( c=='\\' ) c = re_esc_char(p);
546	if( rePeek(p)=='-' ){
547	re_append(p, RE_OP_CC_RANGE, c);
548	p->sIn.i++;
549	c = p->xNextChar(&p->sIn);
550	if( c=='\\' ) c = re_esc_char(p);
551	re_append(p, RE_OP_CC_RANGE, c);
552	}else{
553	re_append(p, RE_OP_CC_VALUE, c);
554	}
555	if( rePeek(p)==']' ){ p->sIn.i++; break; }
556	}
557	if( c==0 ) return "unclosed '['";
558	p->aArg[iFirst] = p->nState - iFirst;
559	break;
560	}
561	case '\\': {
562	int specialOp = 0;
563	switch( rePeek(p) ){
564	case 'b': specialOp = RE_OP_BOUNDARY; break;
565	case 'd': specialOp = RE_OP_DIGIT; break;
566	case 'D': specialOp = RE_OP_NOTDIGIT; break;
567	case 's': specialOp = RE_OP_SPACE; break;
568	case 'S': specialOp = RE_OP_NOTSPACE; break;
569	case 'w': specialOp = RE_OP_WORD; break;
570	case 'W': specialOp = RE_OP_NOTWORD; break;
571	}
572	if( specialOp ){
573	p->sIn.i++;
574	re_append(p, specialOp, 0);
575	}else{
576	c = re_esc_char(p);
577	re_append(p, RE_OP_MATCH, c);
578	}
	@@ -602,21 +624,23 @@
624	if( zIn[0]=='^' ){
625	zIn++;
626	}else{
627	re_append(pRe, RE_OP_ANYSTAR, 0);
628	}
629	pRe->sIn.z = (unsigned char*)zIn;
630	pRe->sIn.i = 0;
631	pRe->sIn.mx = strlen(pRe->sIn.z);
632	zErr = re_subcompile_re(pRe);
633	if( zErr ){
634	re_free(pRe);
635	return zErr;
636	}
637	if( rePeek(pRe)=='$' && pRe->sIn.i+1==pRe->sIn.mx ){
638	re_append(pRe, RE_OP_MATCH, RE_EOF);
639	re_append(pRe, RE_OP_ACCEPT, 0);
640	*ppRe = pRe;
641	}else if( pRe->sIn.i>=pRe->sIn.mx ){
642	re_append(pRe, RE_OP_ACCEPT, 0);
643	*ppRe = pRe;
644	}else{
645	re_free(pRe);
646	return "unrecognized character";
	@@ -676,11 +700,11 @@
700	}
701	sqlite3_set_auxdata(context, 0, pRe, (void()(void))re_free);
702	}
703	zStr = (const unsigned char*)sqlite3_value_text(argv[1]);
704	if( zStr!=0 ){
705	sqlite3_result_int(context, re_exec(pRe, zStr, -1));
706	}
707	}
708
709	/*
710	** Invoke this routine in order to install the REGEXP function in an
	@@ -707,13 +731,12 @@
731	char zLine[2000];
732	while( fgets(zLine, sizeof(zLine), in) ){
733	ln++;
734	n = (int)strlen(zLine);
735	while( n && (zLine[n-1]=='\n' \|\| zLine[n-1]=='\r') ) n--;
736	if( re_exec(pRe, (const unsigned char*)zLine, n) ){
737	printf("%s:%d:%.*s\n", zFile, ln, n, zLine);

738	}
739	}
740	}
741
742	/*
743

M src/wiki.c

+1 -1

		--- src/wiki.c
		+++ src/wiki.c
		@@ -667,11 +667,11 @@
667	667	if( rid2 && (pW2 = manifest_get(rid2, CFTYPE_WIKI))!=0 ){
668	668	blob_init(&w2, pW2->zWiki, -1);
669	669	}
670	670	blob_zero(&d);
671	671	diffFlags = construct_diff_flags(1,0);
672		- text_diff(&w2, &w1, &d, diffFlags \| DIFF_HTML \| DIFF_LINENO);
	672	+ text_diff(&w2, &w1, &d, 0, diffFlags \| DIFF_HTML \| DIFF_LINENO);
673	673	@ <div class="udiff">
674	674	@ %s(blob_str(&d))
675	675	@ </div>
676	676	manifest_destroy(pW1);
677	677	manifest_destroy(pW2);
678	678

	--- src/wiki.c
	+++ src/wiki.c
	@@ -667,11 +667,11 @@
667	if( rid2 && (pW2 = manifest_get(rid2, CFTYPE_WIKI))!=0 ){
668	blob_init(&w2, pW2->zWiki, -1);
669	}
670	blob_zero(&d);
671	diffFlags = construct_diff_flags(1,0);
672	text_diff(&w2, &w1, &d, diffFlags \| DIFF_HTML \| DIFF_LINENO);
673	@ <div class="udiff">
674	@ %s(blob_str(&d))
675	@ </div>
676	manifest_destroy(pW1);
677	manifest_destroy(pW2);
678

	--- src/wiki.c
	+++ src/wiki.c
	@@ -667,11 +667,11 @@
667	if( rid2 && (pW2 = manifest_get(rid2, CFTYPE_WIKI))!=0 ){
668	blob_init(&w2, pW2->zWiki, -1);
669	}
670	blob_zero(&d);
671	diffFlags = construct_diff_flags(1,0);
672	text_diff(&w2, &w1, &d, 0, diffFlags \| DIFF_HTML \| DIFF_LINENO);
673	@ <div class="udiff">
674	@ %s(blob_str(&d))
675	@ </div>
676	manifest_destroy(pW1);
677	manifest_destroy(pW2);
678

Fossil SCM

Keyboard Shortcuts