Fossil SCM

Have the security-audit page analyze and display the content security policy.

drh 2019-08-19 17:18 trunk

Commit 9cf90a4f9d134b7fddb190e27c33c8d5d6f25fd25d37a2241e3fae02c715b5e9

Parent 7979989dff34a27…

3 files changed +7 +80 +107 -8

~ src/cgi.c ~ src/security_audit.c ~ src/wikiformat.c

M src/cgi.c

		--- src/cgi.c
		+++ src/cgi.c
		@@ -144,10 +144,17 @@
144	144	** Return a pointer to the CGI output blob.
145	145	*/
146	146	Blob *cgi_output_blob(void){
147	147	return pContent;
148	148	}
	149	+
	150	+/*
	151	+** Return complete text of the output header
	152	+*/
	153	+const char *cgi_header(void){
	154	+ return blob_str(&cgiContent[0]);
	155	+}
149	156
150	157	/*
151	158	** Combine the header and body of the CGI into a single string.
152	159	*/
153	160	static void cgi_combine_header_and_body(void){
154	161

	--- src/cgi.c
	+++ src/cgi.c
	@@ -144,10 +144,17 @@
144	** Return a pointer to the CGI output blob.
145	*/
146	Blob *cgi_output_blob(void){
147	return pContent;
148	}







149
150	/*
151	** Combine the header and body of the CGI into a single string.
152	*/
153	static void cgi_combine_header_and_body(void){
154

	--- src/cgi.c
	+++ src/cgi.c
	@@ -144,10 +144,17 @@
144	** Return a pointer to the CGI output blob.
145	*/
146	Blob *cgi_output_blob(void){
147	return pContent;
148	}
149
150	/*
151	** Return complete text of the output header
152	*/
153	const char *cgi_header(void){
154	return blob_str(&cgiContent[0]);
155	}
156
157	/*
158	** Combine the header and body of the CGI into a single string.
159	*/
160	static void cgi_combine_header_and_body(void){
161

M src/security_audit.c

+80

		--- src/security_audit.c
		+++ src/security_audit.c
		@@ -32,10 +32,69 @@
32	32	zTest++;
33	33	}
34	34	return 0;
35	35	}
36	36
	37	+/*
	38	+** Extract the content-security-policy from the reply header. Parse it
	39	+** up into separate fields, and return a pointer to a null-terminated
	40	+** array of pointers to strings, one entry for each field. Or return
	41	+** a NULL pointer if no CSP could be located in the header.
	42	+**
	43	+** Memory to hold the returned array and of the strings is obtained from
	44	+** a single memory allocation, which the caller should free to avoid a
	45	+** memory leak.
	46	+*/
	47	+static char **parse_content_security_policy(void){
	48	+ char **azCSP = 0;
	49	+ int nCSP = 0;
	50	+ const char *zHeader;
	51	+ const char *zAll;
	52	+ char *zCopy;
	53	+ int nAll = 0;
	54	+ int ii, jj, n, nx = 0;
	55	+ int nSemi;
	56	+
	57	+ zHeader = cgi_header();
	58	+ if( zHeader==0 ) return 0;
	59	+ for(ii=0; zHeader[ii]; ii+=n){
	60	+ n = html_token_length(zHeader+ii);
	61	+ if( zHeader[ii]=='<'
	62	+ && fossil_strnicmp(html_attribute(zHeader+ii,"http-equiv",&nx),
	63	+ "Content-Security-Policy",23)==0
	64	+ && nx==23
	65	+ && (zAll = html_attribute(zHeader+ii,"content",&nAll))!=0
	66	+ ){
	67	+ for(jj=nSemi=0; jj<nAll; jj++){ if( zAll[jj]==';' ) nSemi++; }
	68	+ azCSP = fossil_malloc( nAll+1 + (nSemi+2)sizeof(char) );
	69	+ zCopy = &azCSP[nSemi+2];
	70	+ memcpy(zCopy,zAll,nAll);
	71	+ zCopy[nAll] = 0;
	72	+ while( fossil_isspace(zCopy[0]) \|\| zCopy[0]==';' ){ zCopy++; }
	73	+ azCSP[0] = zCopy;
	74	+ nCSP = 1;
	75	+ for(jj=0; zCopy[jj]; jj++){
	76	+ if( zCopy[jj]==';' ){
	77	+ int k;
	78	+ for(k=jj-1; k>0 && fossil_isspace(zCopy[k]); k--){ zCopy[k] = 0; }
	79	+ zCopy[jj] = 0;
	80	+ while( jj+1<nAll
	81	+ && (fossil_isspace(zCopy[jj+1]) \|\| zCopy[jj+1]==';')
	82	+ ){
	83	+ jj++;
	84	+ }
	85	+ assert( nCSP<nSemi+1 );
	86	+ azCSP[nCSP++] = zCopy+jj;
	87	+ }
	88	+ }
	89	+ assert( nCSP<=nSemi+2 );
	90	+ azCSP[nCSP] = 0;
	91	+ return azCSP;
	92	+ }
	93	+ }
	94	+ return 0;
	95	+}
37	96
38	97	/*
39	98	** WEBPAGE: secaudit0
40	99	**
41	100	** Run a security audit of the current Fossil setup, looking
		@@ -50,10 +109,11 @@
50	109	const char zAnonCap; / Capabilities of user "anonymous" and "nobody" */
51	110	const char zPubPages; / GLOB pattern for public pages */
52	111	const char zSelfCap; / Capabilities of self-registered users */
53	112	char *z;
54	113	int n;
	114	+ char *azCSP; / Parsed content security policy */
55	115
56	116	login_check_credentials();
57	117	if( !g.perm.Admin ){
58	118	login_needed(0);
59	119	return;
		@@ -439,10 +499,30 @@
439	499	}
440	500
441	501	@ <li><p> User capability summary:
442	502	capability_summary();
443	503
	504	+
	505	+ azCSP = parse_content_security_policy();
	506	+ if( azCSP==0 ){
	507	+ @ <li><p> WARNING: No Content Security Policy (CSP) is specified in the
	508	+ @ header. Though not required, a strong CSP is recommended. Fossil will
	509	+ @ automatically insert an appropriate CSP if you let it generate the
	510	+ @ HTML <tt><head></tt> element by omitting <tt><body></tt>
	511	+ @ from the header configuration in your customized skin.
	512	+ @
	513	+ }else{
	514	+ int ii;
	515	+ @ <li><p> Content Security Policy:
	516	+ @ <ol type="a">
	517	+ for(ii=0; azCSP[ii]; ii++){
	518	+ @ <li>%h(azCSP[ii])
	519	+ }
	520	+ @ </ol>
	521	+ }
	522	+ fossil_free(azCSP);
	523	+
444	524	if( alert_enabled() ){
445	525	@ <li><p> Email alert configuration summary:
446	526	@ <table class="label-value">
447	527	stats_for_email();
448	528	@ </table>
449	529

	--- src/security_audit.c
	+++ src/security_audit.c
	@@ -32,10 +32,69 @@
32	zTest++;
33	}
34	return 0;
35	}
36



























































37
38	/*
39	** WEBPAGE: secaudit0
40	**
41	** Run a security audit of the current Fossil setup, looking
	@@ -50,10 +109,11 @@
50	const char zAnonCap; / Capabilities of user "anonymous" and "nobody" */
51	const char zPubPages; / GLOB pattern for public pages */
52	const char zSelfCap; / Capabilities of self-registered users */
53	char *z;
54	int n;

55
56	login_check_credentials();
57	if( !g.perm.Admin ){
58	login_needed(0);
59	return;
	@@ -439,10 +499,30 @@
439	}
440
441	@ <li><p> User capability summary:
442	capability_summary();
443




















444	if( alert_enabled() ){
445	@ <li><p> Email alert configuration summary:
446	@ <table class="label-value">
447	stats_for_email();
448	@ </table>
449

	--- src/security_audit.c
	+++ src/security_audit.c
	@@ -32,10 +32,69 @@
32	zTest++;
33	}
34	return 0;
35	}
36
37	/*
38	** Extract the content-security-policy from the reply header. Parse it
39	** up into separate fields, and return a pointer to a null-terminated
40	** array of pointers to strings, one entry for each field. Or return
41	** a NULL pointer if no CSP could be located in the header.
42	**
43	** Memory to hold the returned array and of the strings is obtained from
44	** a single memory allocation, which the caller should free to avoid a
45	** memory leak.
46	*/
47	static char **parse_content_security_policy(void){
48	char **azCSP = 0;
49	int nCSP = 0;
50	const char *zHeader;
51	const char *zAll;
52	char *zCopy;
53	int nAll = 0;
54	int ii, jj, n, nx = 0;
55	int nSemi;
56
57	zHeader = cgi_header();
58	if( zHeader==0 ) return 0;
59	for(ii=0; zHeader[ii]; ii+=n){
60	n = html_token_length(zHeader+ii);
61	if( zHeader[ii]=='<'
62	&& fossil_strnicmp(html_attribute(zHeader+ii,"http-equiv",&nx),
63	"Content-Security-Policy",23)==0
64	&& nx==23
65	&& (zAll = html_attribute(zHeader+ii,"content",&nAll))!=0
66	){
67	for(jj=nSemi=0; jj<nAll; jj++){ if( zAll[jj]==';' ) nSemi++; }
68	azCSP = fossil_malloc( nAll+1 + (nSemi+2)sizeof(char) );
69	zCopy = &azCSP[nSemi+2];
70	memcpy(zCopy,zAll,nAll);
71	zCopy[nAll] = 0;
72	while( fossil_isspace(zCopy[0]) \|\| zCopy[0]==';' ){ zCopy++; }
73	azCSP[0] = zCopy;
74	nCSP = 1;
75	for(jj=0; zCopy[jj]; jj++){
76	if( zCopy[jj]==';' ){
77	int k;
78	for(k=jj-1; k>0 && fossil_isspace(zCopy[k]); k--){ zCopy[k] = 0; }
79	zCopy[jj] = 0;
80	while( jj+1<nAll
81	&& (fossil_isspace(zCopy[jj+1]) \|\| zCopy[jj+1]==';')
82	){
83	jj++;
84	}
85	assert( nCSP<nSemi+1 );
86	azCSP[nCSP++] = zCopy+jj;
87	}
88	}
89	assert( nCSP<=nSemi+2 );
90	azCSP[nCSP] = 0;
91	return azCSP;
92	}
93	}
94	return 0;
95	}
96
97	/*
98	** WEBPAGE: secaudit0
99	**
100	** Run a security audit of the current Fossil setup, looking
	@@ -50,10 +109,11 @@
109	const char zAnonCap; / Capabilities of user "anonymous" and "nobody" */
110	const char zPubPages; / GLOB pattern for public pages */
111	const char zSelfCap; / Capabilities of self-registered users */
112	char *z;
113	int n;
114	char *azCSP; / Parsed content security policy */
115
116	login_check_credentials();
117	if( !g.perm.Admin ){
118	login_needed(0);
119	return;
	@@ -439,10 +499,30 @@
499	}
500
501	@ <li><p> User capability summary:
502	capability_summary();
503
504
505	azCSP = parse_content_security_policy();
506	if( azCSP==0 ){
507	@ <li><p> WARNING: No Content Security Policy (CSP) is specified in the
508	@ header. Though not required, a strong CSP is recommended. Fossil will
509	@ automatically insert an appropriate CSP if you let it generate the
510	@ HTML <tt><head></tt> element by omitting <tt><body></tt>
511	@ from the header configuration in your customized skin.
512	@
513	}else{
514	int ii;
515	@ <li><p> Content Security Policy:
516	@ <ol type="a">
517	for(ii=0; azCSP[ii]; ii++){
518	@ <li>%h(azCSP[ii])
519	}
520	@ </ol>
521	}
522	fossil_free(azCSP);
523
524	if( alert_enabled() ){
525	@ <li><p> Email alert configuration summary:
526	@ <table class="label-value">
527	stats_for_email();
528	@ </table>
529

M src/wikiformat.c

+107 -8

		--- src/wikiformat.c
		+++ src/wikiformat.c
		@@ -2014,16 +2014,13 @@
2014	2014	}
2015	2015	free(renderer.aStack);
2016	2016	}
2017	2017
2018	2018	/*
2019		-** Get the next HTML token.
2020		-**
2021		-** z points to the start of a token. Return the number of
2022		-** characters in that token.
	2019	+** Return the length, in bytes, of the HTML token that z is pointing to.
2023	2020	*/
2024		-static int nextHtmlToken(const char *z){
	2021	+int html_token_length(const char *z){
2025	2022	int n;
2026	2023	char c;
2027	2024	if( (c=z[0])=='<' ){
2028	2025	n = htmlTagLength(z);
2029	2026	if( n<=0 ) n = 1;
		@@ -2040,10 +2037,112 @@
2040	2037	if( c=='<' \|\| c=='&' \|\| fossil_isspace(c) \|\| c==0 ) break;
2041	2038	}
2042	2039	}
2043	2040	return n;
2044	2041	}
	2042	+
	2043	+/*
	2044	+** z points to someplace in the middle of HTML markup. Return the length
	2045	+** of the subtoken that starts on z.
	2046	+*/
	2047	+int html_subtoken_length(const char *z){
	2048	+ int n;
	2049	+ char c;
	2050	+ c = z[0];
	2051	+ if( fossil_isspace(c) ){
	2052	+ for(n=1; z[n] && fossil_isspace(z[n]); n++){}
	2053	+ return n;
	2054	+ }
	2055	+ if( c=='"' \|\| c=='\'' ){
	2056	+ for(n=1; z[n] && z[n]!=c && z[n]!='>'; n++){}
	2057	+ if( z[n]==c ) n++;
	2058	+ return n;
	2059	+ }
	2060	+ if( c=='>' ){
	2061	+ return 0;
	2062	+ }
	2063	+ if( c=='=' ){
	2064	+ return 1;
	2065	+ }
	2066	+ if( fossil_isalnum(c) \|\| c=='/' ){
	2067	+ for(n=1; (c=z[n])!=0 && (fossil_isalnum(c) \|\| c=='-' \|\| c=='_'); n++){}
	2068	+ return n;
	2069	+ }
	2070	+ return 1;
	2071	+}
	2072	+
	2073	+/*
	2074	+** z points to an HTML markup token: <TAG ATTR=VALUE ...>
	2075	+** This routine looks for the VALUE associated with zAttr and returns
	2076	+** a pointer to the start of that value and sets *pLen to be the length
	2077	+** in bytes for the value. Or it returns NULL if no such attr exists.
	2078	+*/
	2079	+const char html_attribute(const char zMarkup, const char zAttr, int pLen){
	2080	+ int i = 1;
	2081	+ int n;
	2082	+ int nAttr;
	2083	+ int iMatchCnt = 0;
	2084	+ assert( zMarkup[0]=='<' );
	2085	+ assert( zMarkup[1]!=0 );
	2086	+ n = html_subtoken_length(zMarkup+i);
	2087	+ if( n==0 ) return 0;
	2088	+ i += n;
	2089	+ nAttr = (int)strlen(zAttr);
	2090	+ while( 1 ){
	2091	+ const char *zStart = zMarkup+i;
	2092	+ n = html_subtoken_length(zStart);
	2093	+ if( n==0 ) break;
	2094	+ i += n;
	2095	+ if( fossil_isspace(zStart[0]) ) continue;
	2096	+ if( n==nAttr && fossil_strnicmp(zAttr,zStart,nAttr)==0 ){
	2097	+ iMatchCnt = 1;
	2098	+ }else if( n==1 && zStart[0]=='=' && iMatchCnt==1 ){
	2099	+ iMatchCnt = 2;
	2100	+ }else if( iMatchCnt==2 ){
	2101	+ if( (zStart[0]=='"' \|\| zStart[0]=='\'') && zStart[n-1]==zStart[0] ){
	2102	+ zStart++;
	2103	+ n -= 2;
	2104	+ }
	2105	+ *pLen = n;
	2106	+ return zStart;
	2107	+ }else{
	2108	+ iMatchCnt = 0;
	2109	+ }
	2110	+ }
	2111	+ return 0;
	2112	+}
	2113	+
	2114	+/*
	2115	+** COMMAND: test-html-tokenize
	2116	+**
	2117	+** Tokenize an HTML file. Return the offset and length and text of
	2118	+** each token - one token per line. Omit white-space tokens.
	2119	+*/
	2120	+void test_html_tokenize(void){
	2121	+ Blob in;
	2122	+ char *z;
	2123	+ int i;
	2124	+ int iOfst, n;
	2125	+
	2126	+ for(i=2; i<g.argc; i++){
	2127	+ blob_read_from_file(&in, g.argv[i], ExtFILE);
	2128	+ z = blob_str(&in);
	2129	+ for(iOfst=0; z[iOfst]; iOfst+=n){
	2130	+ n = html_token_length(z+iOfst);
	2131	+ if( fossil_isspace(z[iOfst]) ) continue;
	2132	+ fossil_print("%d %d %.*s\n", iOfst, n, n, z+iOfst);
	2133	+ if( z[iOfst]=='<' && n>1 ){
	2134	+ int j,k;
	2135	+ for(j=iOfst+1; (k = html_subtoken_length(z+j))>0; j+=k){
	2136	+ if( fossil_isspace(z[j]) \|\| z[j]=='=' ) continue;
	2137	+ fossil_print("# %d %d %.*s\n", j, k, k, z+j);
	2138	+ }
	2139	+ }
	2140	+ }
	2141	+ blob_reset(&in);
	2142	+ }
	2143	+}
2045	2144
2046	2145	/*
2047	2146	** Attempt to reformat messy HTML to be easily readable by humans.
2048	2147	**
2049	2148	** * Try to keep lines less than 80 characters in length
		@@ -2062,11 +2161,11 @@
2062	2161	int nPre = 0;
2063	2162	int iCur = 0;
2064	2163	int wantSpace = 0;
2065	2164	int omitSpace = 1;
2066	2165	while( zIn[0] ){
2067		- n = nextHtmlToken(zIn);
	2166	+ n = html_token_length(zIn);
2068	2167	if( zIn[0]=='<' && n>1 ){
2069	2168	int i, j;
2070	2169	int isCloseTag;
2071	2170	int eTag;
2072	2171	int eType;
		@@ -2181,11 +2280,11 @@
2181	2280	int seenText = 0; /* True after first non-whitespace seen */
2182	2281	int nNL = 0; /* Number of \n characters at the end of pOut */
2183	2282	int nWS = 0; /* True if pOut ends with whitespace */
2184	2283	while( fossil_isspace(zIn[0]) ) zIn++;
2185	2284	while( zIn[0] ){
2186		- n = nextHtmlToken(zIn);
	2285	+ n = html_token_length(zIn);
2187	2286	if( zIn[0]=='<' && n>1 ){
2188	2287	int isCloseTag;
2189	2288	int eTag;
2190	2289	int eType;
2191	2290	char zTag[32];
		@@ -2197,11 +2296,11 @@
2197	2296	eTag = findTag(zTag);
2198	2297	eType = aMarkup[eTag].iType;
2199	2298	if( eTag==MARKUP_INVALID && fossil_strnicmp(zIn,"<style",6)==0 ){
2200	2299	zIn += n;
2201	2300	while( zIn[0] ){
2202		- n = nextHtmlToken(zIn);
	2301	+ n = html_token_length(zIn);
2203	2302	if( fossil_strnicmp(zIn, "</style",7)==0 ) break;
2204	2303	zIn += n;
2205	2304	}
2206	2305	if( zIn[0]=='<' ) zIn += n;
2207	2306	continue;
2208	2307

	--- src/wikiformat.c
	+++ src/wikiformat.c
	@@ -2014,16 +2014,13 @@
2014	}
2015	free(renderer.aStack);
2016	}
2017
2018	/*
2019	** Get the next HTML token.
2020	**
2021	** z points to the start of a token. Return the number of
2022	** characters in that token.
2023	*/
2024	static int nextHtmlToken(const char *z){
2025	int n;
2026	char c;
2027	if( (c=z[0])=='<' ){
2028	n = htmlTagLength(z);
2029	if( n<=0 ) n = 1;
	@@ -2040,10 +2037,112 @@
2040	if( c=='<' \|\| c=='&' \|\| fossil_isspace(c) \|\| c==0 ) break;
2041	}
2042	}
2043	return n;
2044	}






































































































2045
2046	/*
2047	** Attempt to reformat messy HTML to be easily readable by humans.
2048	**
2049	** * Try to keep lines less than 80 characters in length
	@@ -2062,11 +2161,11 @@
2062	int nPre = 0;
2063	int iCur = 0;
2064	int wantSpace = 0;
2065	int omitSpace = 1;
2066	while( zIn[0] ){
2067	n = nextHtmlToken(zIn);
2068	if( zIn[0]=='<' && n>1 ){
2069	int i, j;
2070	int isCloseTag;
2071	int eTag;
2072	int eType;
	@@ -2181,11 +2280,11 @@
2181	int seenText = 0; /* True after first non-whitespace seen */
2182	int nNL = 0; /* Number of \n characters at the end of pOut */
2183	int nWS = 0; /* True if pOut ends with whitespace */
2184	while( fossil_isspace(zIn[0]) ) zIn++;
2185	while( zIn[0] ){
2186	n = nextHtmlToken(zIn);
2187	if( zIn[0]=='<' && n>1 ){
2188	int isCloseTag;
2189	int eTag;
2190	int eType;
2191	char zTag[32];
	@@ -2197,11 +2296,11 @@
2197	eTag = findTag(zTag);
2198	eType = aMarkup[eTag].iType;
2199	if( eTag==MARKUP_INVALID && fossil_strnicmp(zIn,"<style",6)==0 ){
2200	zIn += n;
2201	while( zIn[0] ){
2202	n = nextHtmlToken(zIn);
2203	if( fossil_strnicmp(zIn, "</style",7)==0 ) break;
2204	zIn += n;
2205	}
2206	if( zIn[0]=='<' ) zIn += n;
2207	continue;
2208

	--- src/wikiformat.c
	+++ src/wikiformat.c
	@@ -2014,16 +2014,13 @@
2014	}
2015	free(renderer.aStack);
2016	}
2017
2018	/*
2019	** Return the length, in bytes, of the HTML token that z is pointing to.



2020	*/
2021	int html_token_length(const char *z){
2022	int n;
2023	char c;
2024	if( (c=z[0])=='<' ){
2025	n = htmlTagLength(z);
2026	if( n<=0 ) n = 1;
	@@ -2040,10 +2037,112 @@
2037	if( c=='<' \|\| c=='&' \|\| fossil_isspace(c) \|\| c==0 ) break;
2038	}
2039	}
2040	return n;
2041	}
2042
2043	/*
2044	** z points to someplace in the middle of HTML markup. Return the length
2045	** of the subtoken that starts on z.
2046	*/
2047	int html_subtoken_length(const char *z){
2048	int n;
2049	char c;
2050	c = z[0];
2051	if( fossil_isspace(c) ){
2052	for(n=1; z[n] && fossil_isspace(z[n]); n++){}
2053	return n;
2054	}
2055	if( c=='"' \|\| c=='\'' ){
2056	for(n=1; z[n] && z[n]!=c && z[n]!='>'; n++){}
2057	if( z[n]==c ) n++;
2058	return n;
2059	}
2060	if( c=='>' ){
2061	return 0;
2062	}
2063	if( c=='=' ){
2064	return 1;
2065	}
2066	if( fossil_isalnum(c) \|\| c=='/' ){
2067	for(n=1; (c=z[n])!=0 && (fossil_isalnum(c) \|\| c=='-' \|\| c=='_'); n++){}
2068	return n;
2069	}
2070	return 1;
2071	}
2072
2073	/*
2074	** z points to an HTML markup token: <TAG ATTR=VALUE ...>
2075	** This routine looks for the VALUE associated with zAttr and returns
2076	** a pointer to the start of that value and sets *pLen to be the length
2077	** in bytes for the value. Or it returns NULL if no such attr exists.
2078	*/
2079	const char html_attribute(const char zMarkup, const char zAttr, int pLen){
2080	int i = 1;
2081	int n;
2082	int nAttr;
2083	int iMatchCnt = 0;
2084	assert( zMarkup[0]=='<' );
2085	assert( zMarkup[1]!=0 );
2086	n = html_subtoken_length(zMarkup+i);
2087	if( n==0 ) return 0;
2088	i += n;
2089	nAttr = (int)strlen(zAttr);
2090	while( 1 ){
2091	const char *zStart = zMarkup+i;
2092	n = html_subtoken_length(zStart);
2093	if( n==0 ) break;
2094	i += n;
2095	if( fossil_isspace(zStart[0]) ) continue;
2096	if( n==nAttr && fossil_strnicmp(zAttr,zStart,nAttr)==0 ){
2097	iMatchCnt = 1;
2098	}else if( n==1 && zStart[0]=='=' && iMatchCnt==1 ){
2099	iMatchCnt = 2;
2100	}else if( iMatchCnt==2 ){
2101	if( (zStart[0]=='"' \|\| zStart[0]=='\'') && zStart[n-1]==zStart[0] ){
2102	zStart++;
2103	n -= 2;
2104	}
2105	*pLen = n;
2106	return zStart;
2107	}else{
2108	iMatchCnt = 0;
2109	}
2110	}
2111	return 0;
2112	}
2113
2114	/*
2115	** COMMAND: test-html-tokenize
2116	**
2117	** Tokenize an HTML file. Return the offset and length and text of
2118	** each token - one token per line. Omit white-space tokens.
2119	*/
2120	void test_html_tokenize(void){
2121	Blob in;
2122	char *z;
2123	int i;
2124	int iOfst, n;
2125
2126	for(i=2; i<g.argc; i++){
2127	blob_read_from_file(&in, g.argv[i], ExtFILE);
2128	z = blob_str(&in);
2129	for(iOfst=0; z[iOfst]; iOfst+=n){
2130	n = html_token_length(z+iOfst);
2131	if( fossil_isspace(z[iOfst]) ) continue;
2132	fossil_print("%d %d %.*s\n", iOfst, n, n, z+iOfst);
2133	if( z[iOfst]=='<' && n>1 ){
2134	int j,k;
2135	for(j=iOfst+1; (k = html_subtoken_length(z+j))>0; j+=k){
2136	if( fossil_isspace(z[j]) \|\| z[j]=='=' ) continue;
2137	fossil_print("# %d %d %.*s\n", j, k, k, z+j);
2138	}
2139	}
2140	}
2141	blob_reset(&in);
2142	}
2143	}
2144
2145	/*
2146	** Attempt to reformat messy HTML to be easily readable by humans.
2147	**
2148	** * Try to keep lines less than 80 characters in length
	@@ -2062,11 +2161,11 @@
2161	int nPre = 0;
2162	int iCur = 0;
2163	int wantSpace = 0;
2164	int omitSpace = 1;
2165	while( zIn[0] ){
2166	n = html_token_length(zIn);
2167	if( zIn[0]=='<' && n>1 ){
2168	int i, j;
2169	int isCloseTag;
2170	int eTag;
2171	int eType;
	@@ -2181,11 +2280,11 @@
2280	int seenText = 0; /* True after first non-whitespace seen */
2281	int nNL = 0; /* Number of \n characters at the end of pOut */
2282	int nWS = 0; /* True if pOut ends with whitespace */
2283	while( fossil_isspace(zIn[0]) ) zIn++;
2284	while( zIn[0] ){
2285	n = html_token_length(zIn);
2286	if( zIn[0]=='<' && n>1 ){
2287	int isCloseTag;
2288	int eTag;
2289	int eType;
2290	char zTag[32];
	@@ -2197,11 +2296,11 @@
2296	eTag = findTag(zTag);
2297	eType = aMarkup[eTag].iType;
2298	if( eTag==MARKUP_INVALID && fossil_strnicmp(zIn,"<style",6)==0 ){
2299	zIn += n;
2300	while( zIn[0] ){
2301	n = html_token_length(zIn);
2302	if( fossil_strnicmp(zIn, "</style",7)==0 ) break;
2303	zIn += n;
2304	}
2305	if( zIn[0]=='<' ) zIn += n;
2306	continue;
2307

Fossil SCM

Keyboard Shortcuts