Fossil SCM

Have the security-audit page analyze and display the content security policy.

drh 2019-08-19 17:18 trunk
Commit 9cf90a4f9d134b7fddb190e27c33c8d5d6f25fd25d37a2241e3fae02c715b5e9
+7
--- src/cgi.c
+++ src/cgi.c
@@ -144,10 +144,17 @@
144144
** Return a pointer to the CGI output blob.
145145
*/
146146
Blob *cgi_output_blob(void){
147147
return pContent;
148148
}
149
+
150
+/*
151
+** Return complete text of the output header
152
+*/
153
+const char *cgi_header(void){
154
+ return blob_str(&cgiContent[0]);
155
+}
149156
150157
/*
151158
** Combine the header and body of the CGI into a single string.
152159
*/
153160
static void cgi_combine_header_and_body(void){
154161
--- src/cgi.c
+++ src/cgi.c
@@ -144,10 +144,17 @@
144 ** Return a pointer to the CGI output blob.
145 */
146 Blob *cgi_output_blob(void){
147 return pContent;
148 }
 
 
 
 
 
 
 
149
150 /*
151 ** Combine the header and body of the CGI into a single string.
152 */
153 static void cgi_combine_header_and_body(void){
154
--- src/cgi.c
+++ src/cgi.c
@@ -144,10 +144,17 @@
144 ** Return a pointer to the CGI output blob.
145 */
146 Blob *cgi_output_blob(void){
147 return pContent;
148 }
149
150 /*
151 ** Return complete text of the output header
152 */
153 const char *cgi_header(void){
154 return blob_str(&cgiContent[0]);
155 }
156
157 /*
158 ** Combine the header and body of the CGI into a single string.
159 */
160 static void cgi_combine_header_and_body(void){
161
--- src/security_audit.c
+++ src/security_audit.c
@@ -32,10 +32,69 @@
3232
zTest++;
3333
}
3434
return 0;
3535
}
3636
37
+/*
38
+** Extract the content-security-policy from the reply header. Parse it
39
+** up into separate fields, and return a pointer to a null-terminated
40
+** array of pointers to strings, one entry for each field. Or return
41
+** a NULL pointer if no CSP could be located in the header.
42
+**
43
+** Memory to hold the returned array and of the strings is obtained from
44
+** a single memory allocation, which the caller should free to avoid a
45
+** memory leak.
46
+*/
47
+static char **parse_content_security_policy(void){
48
+ char **azCSP = 0;
49
+ int nCSP = 0;
50
+ const char *zHeader;
51
+ const char *zAll;
52
+ char *zCopy;
53
+ int nAll = 0;
54
+ int ii, jj, n, nx = 0;
55
+ int nSemi;
56
+
57
+ zHeader = cgi_header();
58
+ if( zHeader==0 ) return 0;
59
+ for(ii=0; zHeader[ii]; ii+=n){
60
+ n = html_token_length(zHeader+ii);
61
+ if( zHeader[ii]=='<'
62
+ && fossil_strnicmp(html_attribute(zHeader+ii,"http-equiv",&nx),
63
+ "Content-Security-Policy",23)==0
64
+ && nx==23
65
+ && (zAll = html_attribute(zHeader+ii,"content",&nAll))!=0
66
+ ){
67
+ for(jj=nSemi=0; jj<nAll; jj++){ if( zAll[jj]==';' ) nSemi++; }
68
+ azCSP = fossil_malloc( nAll+1 + (nSemi+2)*sizeof(char*) );
69
+ zCopy = &azCSP[nSemi+2];
70
+ memcpy(zCopy,zAll,nAll);
71
+ zCopy[nAll] = 0;
72
+ while( fossil_isspace(zCopy[0]) || zCopy[0]==';' ){ zCopy++; }
73
+ azCSP[0] = zCopy;
74
+ nCSP = 1;
75
+ for(jj=0; zCopy[jj]; jj++){
76
+ if( zCopy[jj]==';' ){
77
+ int k;
78
+ for(k=jj-1; k>0 && fossil_isspace(zCopy[k]); k--){ zCopy[k] = 0; }
79
+ zCopy[jj] = 0;
80
+ while( jj+1<nAll
81
+ && (fossil_isspace(zCopy[jj+1]) || zCopy[jj+1]==';')
82
+ ){
83
+ jj++;
84
+ }
85
+ assert( nCSP<nSemi+1 );
86
+ azCSP[nCSP++] = zCopy+jj;
87
+ }
88
+ }
89
+ assert( nCSP<=nSemi+2 );
90
+ azCSP[nCSP] = 0;
91
+ return azCSP;
92
+ }
93
+ }
94
+ return 0;
95
+}
3796
3897
/*
3998
** WEBPAGE: secaudit0
4099
**
41100
** Run a security audit of the current Fossil setup, looking
@@ -50,10 +109,11 @@
50109
const char *zAnonCap; /* Capabilities of user "anonymous" and "nobody" */
51110
const char *zPubPages; /* GLOB pattern for public pages */
52111
const char *zSelfCap; /* Capabilities of self-registered users */
53112
char *z;
54113
int n;
114
+ char **azCSP; /* Parsed content security policy */
55115
56116
login_check_credentials();
57117
if( !g.perm.Admin ){
58118
login_needed(0);
59119
return;
@@ -439,10 +499,30 @@
439499
}
440500
441501
@ <li><p> User capability summary:
442502
capability_summary();
443503
504
+
505
+ azCSP = parse_content_security_policy();
506
+ if( azCSP==0 ){
507
+ @ <li><p> WARNING: No Content Security Policy (CSP) is specified in the
508
+ @ header. Though not required, a strong CSP is recommended. Fossil will
509
+ @ automatically insert an appropriate CSP if you let it generate the
510
+ @ HTML <tt>&lt;head&gt;</tt> element by omitting <tt>&lt;body&gt;</tt>
511
+ @ from the header configuration in your customized skin.
512
+ @
513
+ }else{
514
+ int ii;
515
+ @ <li><p> Content Security Policy:
516
+ @ <ol type="a">
517
+ for(ii=0; azCSP[ii]; ii++){
518
+ @ <li>%h(azCSP[ii])
519
+ }
520
+ @ </ol>
521
+ }
522
+ fossil_free(azCSP);
523
+
444524
if( alert_enabled() ){
445525
@ <li><p> Email alert configuration summary:
446526
@ <table class="label-value">
447527
stats_for_email();
448528
@ </table>
449529
--- src/security_audit.c
+++ src/security_audit.c
@@ -32,10 +32,69 @@
32 zTest++;
33 }
34 return 0;
35 }
36
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
38 /*
39 ** WEBPAGE: secaudit0
40 **
41 ** Run a security audit of the current Fossil setup, looking
@@ -50,10 +109,11 @@
50 const char *zAnonCap; /* Capabilities of user "anonymous" and "nobody" */
51 const char *zPubPages; /* GLOB pattern for public pages */
52 const char *zSelfCap; /* Capabilities of self-registered users */
53 char *z;
54 int n;
 
55
56 login_check_credentials();
57 if( !g.perm.Admin ){
58 login_needed(0);
59 return;
@@ -439,10 +499,30 @@
439 }
440
441 @ <li><p> User capability summary:
442 capability_summary();
443
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
444 if( alert_enabled() ){
445 @ <li><p> Email alert configuration summary:
446 @ <table class="label-value">
447 stats_for_email();
448 @ </table>
449
--- src/security_audit.c
+++ src/security_audit.c
@@ -32,10 +32,69 @@
32 zTest++;
33 }
34 return 0;
35 }
36
37 /*
38 ** Extract the content-security-policy from the reply header. Parse it
39 ** up into separate fields, and return a pointer to a null-terminated
40 ** array of pointers to strings, one entry for each field. Or return
41 ** a NULL pointer if no CSP could be located in the header.
42 **
43 ** Memory to hold the returned array and of the strings is obtained from
44 ** a single memory allocation, which the caller should free to avoid a
45 ** memory leak.
46 */
47 static char **parse_content_security_policy(void){
48 char **azCSP = 0;
49 int nCSP = 0;
50 const char *zHeader;
51 const char *zAll;
52 char *zCopy;
53 int nAll = 0;
54 int ii, jj, n, nx = 0;
55 int nSemi;
56
57 zHeader = cgi_header();
58 if( zHeader==0 ) return 0;
59 for(ii=0; zHeader[ii]; ii+=n){
60 n = html_token_length(zHeader+ii);
61 if( zHeader[ii]=='<'
62 && fossil_strnicmp(html_attribute(zHeader+ii,"http-equiv",&nx),
63 "Content-Security-Policy",23)==0
64 && nx==23
65 && (zAll = html_attribute(zHeader+ii,"content",&nAll))!=0
66 ){
67 for(jj=nSemi=0; jj<nAll; jj++){ if( zAll[jj]==';' ) nSemi++; }
68 azCSP = fossil_malloc( nAll+1 + (nSemi+2)*sizeof(char*) );
69 zCopy = &azCSP[nSemi+2];
70 memcpy(zCopy,zAll,nAll);
71 zCopy[nAll] = 0;
72 while( fossil_isspace(zCopy[0]) || zCopy[0]==';' ){ zCopy++; }
73 azCSP[0] = zCopy;
74 nCSP = 1;
75 for(jj=0; zCopy[jj]; jj++){
76 if( zCopy[jj]==';' ){
77 int k;
78 for(k=jj-1; k>0 && fossil_isspace(zCopy[k]); k--){ zCopy[k] = 0; }
79 zCopy[jj] = 0;
80 while( jj+1<nAll
81 && (fossil_isspace(zCopy[jj+1]) || zCopy[jj+1]==';')
82 ){
83 jj++;
84 }
85 assert( nCSP<nSemi+1 );
86 azCSP[nCSP++] = zCopy+jj;
87 }
88 }
89 assert( nCSP<=nSemi+2 );
90 azCSP[nCSP] = 0;
91 return azCSP;
92 }
93 }
94 return 0;
95 }
96
97 /*
98 ** WEBPAGE: secaudit0
99 **
100 ** Run a security audit of the current Fossil setup, looking
@@ -50,10 +109,11 @@
109 const char *zAnonCap; /* Capabilities of user "anonymous" and "nobody" */
110 const char *zPubPages; /* GLOB pattern for public pages */
111 const char *zSelfCap; /* Capabilities of self-registered users */
112 char *z;
113 int n;
114 char **azCSP; /* Parsed content security policy */
115
116 login_check_credentials();
117 if( !g.perm.Admin ){
118 login_needed(0);
119 return;
@@ -439,10 +499,30 @@
499 }
500
501 @ <li><p> User capability summary:
502 capability_summary();
503
504
505 azCSP = parse_content_security_policy();
506 if( azCSP==0 ){
507 @ <li><p> WARNING: No Content Security Policy (CSP) is specified in the
508 @ header. Though not required, a strong CSP is recommended. Fossil will
509 @ automatically insert an appropriate CSP if you let it generate the
510 @ HTML <tt>&lt;head&gt;</tt> element by omitting <tt>&lt;body&gt;</tt>
511 @ from the header configuration in your customized skin.
512 @
513 }else{
514 int ii;
515 @ <li><p> Content Security Policy:
516 @ <ol type="a">
517 for(ii=0; azCSP[ii]; ii++){
518 @ <li>%h(azCSP[ii])
519 }
520 @ </ol>
521 }
522 fossil_free(azCSP);
523
524 if( alert_enabled() ){
525 @ <li><p> Email alert configuration summary:
526 @ <table class="label-value">
527 stats_for_email();
528 @ </table>
529
+107 -8
--- src/wikiformat.c
+++ src/wikiformat.c
@@ -2014,16 +2014,13 @@
20142014
}
20152015
free(renderer.aStack);
20162016
}
20172017
20182018
/*
2019
-** Get the next HTML token.
2020
-**
2021
-** z points to the start of a token. Return the number of
2022
-** characters in that token.
2019
+** Return the length, in bytes, of the HTML token that z is pointing to.
20232020
*/
2024
-static int nextHtmlToken(const char *z){
2021
+int html_token_length(const char *z){
20252022
int n;
20262023
char c;
20272024
if( (c=z[0])=='<' ){
20282025
n = htmlTagLength(z);
20292026
if( n<=0 ) n = 1;
@@ -2040,10 +2037,112 @@
20402037
if( c=='<' || c=='&' || fossil_isspace(c) || c==0 ) break;
20412038
}
20422039
}
20432040
return n;
20442041
}
2042
+
2043
+/*
2044
+** z points to someplace in the middle of HTML markup. Return the length
2045
+** of the subtoken that starts on z.
2046
+*/
2047
+int html_subtoken_length(const char *z){
2048
+ int n;
2049
+ char c;
2050
+ c = z[0];
2051
+ if( fossil_isspace(c) ){
2052
+ for(n=1; z[n] && fossil_isspace(z[n]); n++){}
2053
+ return n;
2054
+ }
2055
+ if( c=='"' || c=='\'' ){
2056
+ for(n=1; z[n] && z[n]!=c && z[n]!='>'; n++){}
2057
+ if( z[n]==c ) n++;
2058
+ return n;
2059
+ }
2060
+ if( c=='>' ){
2061
+ return 0;
2062
+ }
2063
+ if( c=='=' ){
2064
+ return 1;
2065
+ }
2066
+ if( fossil_isalnum(c) || c=='/' ){
2067
+ for(n=1; (c=z[n])!=0 && (fossil_isalnum(c) || c=='-' || c=='_'); n++){}
2068
+ return n;
2069
+ }
2070
+ return 1;
2071
+}
2072
+
2073
+/*
2074
+** z points to an HTML markup token: <TAG ATTR=VALUE ...>
2075
+** This routine looks for the VALUE associated with zAttr and returns
2076
+** a pointer to the start of that value and sets *pLen to be the length
2077
+** in bytes for the value. Or it returns NULL if no such attr exists.
2078
+*/
2079
+const char *html_attribute(const char *zMarkup, const char *zAttr, int *pLen){
2080
+ int i = 1;
2081
+ int n;
2082
+ int nAttr;
2083
+ int iMatchCnt = 0;
2084
+ assert( zMarkup[0]=='<' );
2085
+ assert( zMarkup[1]!=0 );
2086
+ n = html_subtoken_length(zMarkup+i);
2087
+ if( n==0 ) return 0;
2088
+ i += n;
2089
+ nAttr = (int)strlen(zAttr);
2090
+ while( 1 ){
2091
+ const char *zStart = zMarkup+i;
2092
+ n = html_subtoken_length(zStart);
2093
+ if( n==0 ) break;
2094
+ i += n;
2095
+ if( fossil_isspace(zStart[0]) ) continue;
2096
+ if( n==nAttr && fossil_strnicmp(zAttr,zStart,nAttr)==0 ){
2097
+ iMatchCnt = 1;
2098
+ }else if( n==1 && zStart[0]=='=' && iMatchCnt==1 ){
2099
+ iMatchCnt = 2;
2100
+ }else if( iMatchCnt==2 ){
2101
+ if( (zStart[0]=='"' || zStart[0]=='\'') && zStart[n-1]==zStart[0] ){
2102
+ zStart++;
2103
+ n -= 2;
2104
+ }
2105
+ *pLen = n;
2106
+ return zStart;
2107
+ }else{
2108
+ iMatchCnt = 0;
2109
+ }
2110
+ }
2111
+ return 0;
2112
+}
2113
+
2114
+/*
2115
+** COMMAND: test-html-tokenize
2116
+**
2117
+** Tokenize an HTML file. Return the offset and length and text of
2118
+** each token - one token per line. Omit white-space tokens.
2119
+*/
2120
+void test_html_tokenize(void){
2121
+ Blob in;
2122
+ char *z;
2123
+ int i;
2124
+ int iOfst, n;
2125
+
2126
+ for(i=2; i<g.argc; i++){
2127
+ blob_read_from_file(&in, g.argv[i], ExtFILE);
2128
+ z = blob_str(&in);
2129
+ for(iOfst=0; z[iOfst]; iOfst+=n){
2130
+ n = html_token_length(z+iOfst);
2131
+ if( fossil_isspace(z[iOfst]) ) continue;
2132
+ fossil_print("%d %d %.*s\n", iOfst, n, n, z+iOfst);
2133
+ if( z[iOfst]=='<' && n>1 ){
2134
+ int j,k;
2135
+ for(j=iOfst+1; (k = html_subtoken_length(z+j))>0; j+=k){
2136
+ if( fossil_isspace(z[j]) || z[j]=='=' ) continue;
2137
+ fossil_print("# %d %d %.*s\n", j, k, k, z+j);
2138
+ }
2139
+ }
2140
+ }
2141
+ blob_reset(&in);
2142
+ }
2143
+}
20452144
20462145
/*
20472146
** Attempt to reformat messy HTML to be easily readable by humans.
20482147
**
20492148
** * Try to keep lines less than 80 characters in length
@@ -2062,11 +2161,11 @@
20622161
int nPre = 0;
20632162
int iCur = 0;
20642163
int wantSpace = 0;
20652164
int omitSpace = 1;
20662165
while( zIn[0] ){
2067
- n = nextHtmlToken(zIn);
2166
+ n = html_token_length(zIn);
20682167
if( zIn[0]=='<' && n>1 ){
20692168
int i, j;
20702169
int isCloseTag;
20712170
int eTag;
20722171
int eType;
@@ -2181,11 +2280,11 @@
21812280
int seenText = 0; /* True after first non-whitespace seen */
21822281
int nNL = 0; /* Number of \n characters at the end of pOut */
21832282
int nWS = 0; /* True if pOut ends with whitespace */
21842283
while( fossil_isspace(zIn[0]) ) zIn++;
21852284
while( zIn[0] ){
2186
- n = nextHtmlToken(zIn);
2285
+ n = html_token_length(zIn);
21872286
if( zIn[0]=='<' && n>1 ){
21882287
int isCloseTag;
21892288
int eTag;
21902289
int eType;
21912290
char zTag[32];
@@ -2197,11 +2296,11 @@
21972296
eTag = findTag(zTag);
21982297
eType = aMarkup[eTag].iType;
21992298
if( eTag==MARKUP_INVALID && fossil_strnicmp(zIn,"<style",6)==0 ){
22002299
zIn += n;
22012300
while( zIn[0] ){
2202
- n = nextHtmlToken(zIn);
2301
+ n = html_token_length(zIn);
22032302
if( fossil_strnicmp(zIn, "</style",7)==0 ) break;
22042303
zIn += n;
22052304
}
22062305
if( zIn[0]=='<' ) zIn += n;
22072306
continue;
22082307
--- src/wikiformat.c
+++ src/wikiformat.c
@@ -2014,16 +2014,13 @@
2014 }
2015 free(renderer.aStack);
2016 }
2017
2018 /*
2019 ** Get the next HTML token.
2020 **
2021 ** z points to the start of a token. Return the number of
2022 ** characters in that token.
2023 */
2024 static int nextHtmlToken(const char *z){
2025 int n;
2026 char c;
2027 if( (c=z[0])=='<' ){
2028 n = htmlTagLength(z);
2029 if( n<=0 ) n = 1;
@@ -2040,10 +2037,112 @@
2040 if( c=='<' || c=='&' || fossil_isspace(c) || c==0 ) break;
2041 }
2042 }
2043 return n;
2044 }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2045
2046 /*
2047 ** Attempt to reformat messy HTML to be easily readable by humans.
2048 **
2049 ** * Try to keep lines less than 80 characters in length
@@ -2062,11 +2161,11 @@
2062 int nPre = 0;
2063 int iCur = 0;
2064 int wantSpace = 0;
2065 int omitSpace = 1;
2066 while( zIn[0] ){
2067 n = nextHtmlToken(zIn);
2068 if( zIn[0]=='<' && n>1 ){
2069 int i, j;
2070 int isCloseTag;
2071 int eTag;
2072 int eType;
@@ -2181,11 +2280,11 @@
2181 int seenText = 0; /* True after first non-whitespace seen */
2182 int nNL = 0; /* Number of \n characters at the end of pOut */
2183 int nWS = 0; /* True if pOut ends with whitespace */
2184 while( fossil_isspace(zIn[0]) ) zIn++;
2185 while( zIn[0] ){
2186 n = nextHtmlToken(zIn);
2187 if( zIn[0]=='<' && n>1 ){
2188 int isCloseTag;
2189 int eTag;
2190 int eType;
2191 char zTag[32];
@@ -2197,11 +2296,11 @@
2197 eTag = findTag(zTag);
2198 eType = aMarkup[eTag].iType;
2199 if( eTag==MARKUP_INVALID && fossil_strnicmp(zIn,"<style",6)==0 ){
2200 zIn += n;
2201 while( zIn[0] ){
2202 n = nextHtmlToken(zIn);
2203 if( fossil_strnicmp(zIn, "</style",7)==0 ) break;
2204 zIn += n;
2205 }
2206 if( zIn[0]=='<' ) zIn += n;
2207 continue;
2208
--- src/wikiformat.c
+++ src/wikiformat.c
@@ -2014,16 +2014,13 @@
2014 }
2015 free(renderer.aStack);
2016 }
2017
2018 /*
2019 ** Return the length, in bytes, of the HTML token that z is pointing to.
 
 
 
2020 */
2021 int html_token_length(const char *z){
2022 int n;
2023 char c;
2024 if( (c=z[0])=='<' ){
2025 n = htmlTagLength(z);
2026 if( n<=0 ) n = 1;
@@ -2040,10 +2037,112 @@
2037 if( c=='<' || c=='&' || fossil_isspace(c) || c==0 ) break;
2038 }
2039 }
2040 return n;
2041 }
2042
2043 /*
2044 ** z points to someplace in the middle of HTML markup. Return the length
2045 ** of the subtoken that starts on z.
2046 */
2047 int html_subtoken_length(const char *z){
2048 int n;
2049 char c;
2050 c = z[0];
2051 if( fossil_isspace(c) ){
2052 for(n=1; z[n] && fossil_isspace(z[n]); n++){}
2053 return n;
2054 }
2055 if( c=='"' || c=='\'' ){
2056 for(n=1; z[n] && z[n]!=c && z[n]!='>'; n++){}
2057 if( z[n]==c ) n++;
2058 return n;
2059 }
2060 if( c=='>' ){
2061 return 0;
2062 }
2063 if( c=='=' ){
2064 return 1;
2065 }
2066 if( fossil_isalnum(c) || c=='/' ){
2067 for(n=1; (c=z[n])!=0 && (fossil_isalnum(c) || c=='-' || c=='_'); n++){}
2068 return n;
2069 }
2070 return 1;
2071 }
2072
2073 /*
2074 ** z points to an HTML markup token: <TAG ATTR=VALUE ...>
2075 ** This routine looks for the VALUE associated with zAttr and returns
2076 ** a pointer to the start of that value and sets *pLen to be the length
2077 ** in bytes for the value. Or it returns NULL if no such attr exists.
2078 */
2079 const char *html_attribute(const char *zMarkup, const char *zAttr, int *pLen){
2080 int i = 1;
2081 int n;
2082 int nAttr;
2083 int iMatchCnt = 0;
2084 assert( zMarkup[0]=='<' );
2085 assert( zMarkup[1]!=0 );
2086 n = html_subtoken_length(zMarkup+i);
2087 if( n==0 ) return 0;
2088 i += n;
2089 nAttr = (int)strlen(zAttr);
2090 while( 1 ){
2091 const char *zStart = zMarkup+i;
2092 n = html_subtoken_length(zStart);
2093 if( n==0 ) break;
2094 i += n;
2095 if( fossil_isspace(zStart[0]) ) continue;
2096 if( n==nAttr && fossil_strnicmp(zAttr,zStart,nAttr)==0 ){
2097 iMatchCnt = 1;
2098 }else if( n==1 && zStart[0]=='=' && iMatchCnt==1 ){
2099 iMatchCnt = 2;
2100 }else if( iMatchCnt==2 ){
2101 if( (zStart[0]=='"' || zStart[0]=='\'') && zStart[n-1]==zStart[0] ){
2102 zStart++;
2103 n -= 2;
2104 }
2105 *pLen = n;
2106 return zStart;
2107 }else{
2108 iMatchCnt = 0;
2109 }
2110 }
2111 return 0;
2112 }
2113
2114 /*
2115 ** COMMAND: test-html-tokenize
2116 **
2117 ** Tokenize an HTML file. Return the offset and length and text of
2118 ** each token - one token per line. Omit white-space tokens.
2119 */
2120 void test_html_tokenize(void){
2121 Blob in;
2122 char *z;
2123 int i;
2124 int iOfst, n;
2125
2126 for(i=2; i<g.argc; i++){
2127 blob_read_from_file(&in, g.argv[i], ExtFILE);
2128 z = blob_str(&in);
2129 for(iOfst=0; z[iOfst]; iOfst+=n){
2130 n = html_token_length(z+iOfst);
2131 if( fossil_isspace(z[iOfst]) ) continue;
2132 fossil_print("%d %d %.*s\n", iOfst, n, n, z+iOfst);
2133 if( z[iOfst]=='<' && n>1 ){
2134 int j,k;
2135 for(j=iOfst+1; (k = html_subtoken_length(z+j))>0; j+=k){
2136 if( fossil_isspace(z[j]) || z[j]=='=' ) continue;
2137 fossil_print("# %d %d %.*s\n", j, k, k, z+j);
2138 }
2139 }
2140 }
2141 blob_reset(&in);
2142 }
2143 }
2144
2145 /*
2146 ** Attempt to reformat messy HTML to be easily readable by humans.
2147 **
2148 ** * Try to keep lines less than 80 characters in length
@@ -2062,11 +2161,11 @@
2161 int nPre = 0;
2162 int iCur = 0;
2163 int wantSpace = 0;
2164 int omitSpace = 1;
2165 while( zIn[0] ){
2166 n = html_token_length(zIn);
2167 if( zIn[0]=='<' && n>1 ){
2168 int i, j;
2169 int isCloseTag;
2170 int eTag;
2171 int eType;
@@ -2181,11 +2280,11 @@
2280 int seenText = 0; /* True after first non-whitespace seen */
2281 int nNL = 0; /* Number of \n characters at the end of pOut */
2282 int nWS = 0; /* True if pOut ends with whitespace */
2283 while( fossil_isspace(zIn[0]) ) zIn++;
2284 while( zIn[0] ){
2285 n = html_token_length(zIn);
2286 if( zIn[0]=='<' && n>1 ){
2287 int isCloseTag;
2288 int eTag;
2289 int eType;
2290 char zTag[32];
@@ -2197,11 +2296,11 @@
2296 eTag = findTag(zTag);
2297 eType = aMarkup[eTag].iType;
2298 if( eTag==MARKUP_INVALID && fossil_strnicmp(zIn,"<style",6)==0 ){
2299 zIn += n;
2300 while( zIn[0] ){
2301 n = html_token_length(zIn);
2302 if( fossil_strnicmp(zIn, "</style",7)==0 ) break;
2303 zIn += n;
2304 }
2305 if( zIn[0]=='<' ) zIn += n;
2306 continue;
2307

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button