Fossil SCM

fossil-scm / src / wikiformat.c
Blame History Raw 2995 lines
1
/*
2
** Copyright (c) 2007 D. Richard Hipp
3
**
4
** This program is free software; you can redistribute it and/or
5
** modify it under the terms of the Simplified BSD License (also
6
** known as the "2-Clause License" or "FreeBSD License".)
7
8
** This program is distributed in the hope that it will be useful,
9
** but without any warranty; without even the implied warranty of
10
** merchantability or fitness for a particular purpose.
11
**
12
** Author contact information:
13
** [email protected]
14
** http://www.hwaci.com/drh/
15
**
16
*******************************************************************************
17
**
18
** This file contains code to do formatting of wiki text.
19
*/
20
#include "config.h"
21
#include <assert.h>
22
#include "wikiformat.h"
23
24
#if INTERFACE
25
/*
26
** Allowed wiki transformation operations
27
*/
28
#define WIKI_HTMLONLY 0x0001 /* HTML markup only. No wiki */
29
#define WIKI_INLINE 0x0002 /* Do not surround with <p>..</p> */
30
/* available for reuse: 0x0004 --- formerly WIKI_NOBLOCK */
31
#define WIKI_BUTTONS 0x0008 /* Allow sub-menu buttons */
32
#define WIKI_NOBADLINKS 0x0010 /* Ignore broken hyperlinks */
33
#define WIKI_LINKSONLY 0x0020 /* No markup. Only decorate links */
34
#define WIKI_NEWLINE 0x0040 /* Honor \n - break lines at each \n */
35
#define WIKI_MARKDOWNLINKS 0x0080 /* Resolve hyperlinks as in markdown */
36
#define WIKI_SAFE 0x0100 /* Make the result safe for embedding */
37
#define WIKI_TARGET_BLANK 0x0200 /* Hyperlinks go to a new window */
38
#define WIKI_NOBRACKET 0x0400 /* Omit extra [..] around hyperlinks */
39
#define WIKI_ADMIN 0x0800 /* Ignore g.perm.Hyperlink */
40
#define WIKI_MARK 0x1000 /* Add <mark>..</mark> around problems */
41
42
/*
43
** Return values from wiki_convert
44
*/
45
#define RENDER_LINK 0x0001 /* One or more hyperlinks rendered */
46
#define RENDER_ENTITY 0x0002 /* One or more HTML entities (ex: &lt;) */
47
#define RENDER_TAG 0x0004 /* One or more HTML tags */
48
#define RENDER_BLOCKTAG 0x0008 /* One or more HTML block tags (ex: <p>) */
49
#define RENDER_BLOCK 0x0010 /* Block wiki (paragraphs, etc.) */
50
#define RENDER_MARK 0x0020 /* Output contains <mark>..</mark> */
51
#define RENDER_BADLINK 0x0100 /* Bad hyperlink syntax seen */
52
#define RENDER_BADTARGET 0x0200 /* Bad hyperlink target */
53
#define RENDER_BADTAG 0x0400 /* Bad HTML tag or tag syntax */
54
#define RENDER_BADENTITY 0x0800 /* Bad HTML entity syntax */
55
#define RENDER_BADHTML 0x1000 /* Bad HTML seen */
56
#define RENDER_ERROR 0x8000 /* Some other kind of error */
57
/* Composite values: */
58
#define RENDER_ANYERROR 0x9f00 /* Mask for any kind of error */
59
60
#endif /* INTERFACE */
61
62
63
/*
64
** These are the only markup attributes allowed.
65
*/
66
enum allowed_attr_t {
67
ATTR_ALIGN = 1,
68
ATTR_ALT,
69
ATTR_BGCOLOR,
70
ATTR_BORDER,
71
ATTR_CELLPADDING,
72
ATTR_CELLSPACING,
73
ATTR_CLASS,
74
ATTR_CLEAR,
75
ATTR_COLOR,
76
ATTR_COLSPAN,
77
ATTR_COMPACT,
78
ATTR_FACE,
79
ATTR_HEIGHT,
80
ATTR_HREF,
81
ATTR_HSPACE,
82
ATTR_ID,
83
ATTR_LINKS,
84
ATTR_NAME,
85
ATTR_ROWSPAN,
86
ATTR_SIZE,
87
ATTR_SRC,
88
ATTR_START,
89
ATTR_STYLE,
90
ATTR_TARGET,
91
ATTR_TITLE,
92
ATTR_TYPE,
93
ATTR_VALIGN,
94
ATTR_VALUE,
95
ATTR_VSPACE,
96
ATTR_WIDTH
97
};
98
99
enum amsk_t {
100
AMSK_ALIGN = 0x00000001,
101
AMSK_ALT = 0x00000002,
102
AMSK_BGCOLOR = 0x00000004,
103
AMSK_BORDER = 0x00000008,
104
AMSK_CELLPADDING = 0x00000010,
105
AMSK_CELLSPACING = 0x00000020,
106
AMSK_CLASS = 0x00000040,
107
AMSK_CLEAR = 0x00000080,
108
AMSK_COLOR = 0x00000100,
109
AMSK_COLSPAN = 0x00000200,
110
AMSK_COMPACT = 0x00000400,
111
AMSK_FACE = 0x00000800,
112
AMSK_HEIGHT = 0x00001000,
113
AMSK_HREF = 0x00002000,
114
AMSK_HSPACE = 0x00004000,
115
AMSK_ID = 0x00008000,
116
AMSK_LINKS = 0x00010000,
117
AMSK_NAME = 0x00020000,
118
AMSK_ROWSPAN = 0x00040000,
119
AMSK_SIZE = 0x00080000,
120
AMSK_SRC = 0x00100000,
121
AMSK_START = 0x00200000,
122
AMSK_STYLE = 0x00400000,
123
AMSK_TARGET = 0x00800000,
124
AMSK_TITLE = 0x01000000,
125
AMSK_TYPE = 0x02000000,
126
AMSK_VALIGN = 0x04000000,
127
AMSK_VALUE = 0x08000000,
128
AMSK_VSPACE = 0x10000000,
129
AMSK_WIDTH = 0x20000000
130
};
131
132
static const struct AllowedAttribute {
133
const char *zName;
134
unsigned int iMask;
135
} aAttribute[] = {
136
/* These indexes MUST line up with their
137
corresponding allowed_attr_t enum values.
138
*/
139
{ 0, 0 },
140
{ "align", AMSK_ALIGN },
141
{ "alt", AMSK_ALT },
142
{ "bgcolor", AMSK_BGCOLOR },
143
{ "border", AMSK_BORDER },
144
{ "cellpadding", AMSK_CELLPADDING },
145
{ "cellspacing", AMSK_CELLSPACING },
146
{ "class", AMSK_CLASS },
147
{ "clear", AMSK_CLEAR },
148
{ "color", AMSK_COLOR },
149
{ "colspan", AMSK_COLSPAN },
150
{ "compact", AMSK_COMPACT },
151
{ "face", AMSK_FACE },
152
{ "height", AMSK_HEIGHT },
153
{ "href", AMSK_HREF },
154
{ "hspace", AMSK_HSPACE },
155
{ "id", AMSK_ID },
156
{ "links", AMSK_LINKS },
157
{ "name", AMSK_NAME },
158
{ "rowspan", AMSK_ROWSPAN },
159
{ "size", AMSK_SIZE },
160
{ "src", AMSK_SRC },
161
{ "start", AMSK_START },
162
{ "style", AMSK_STYLE },
163
{ "target", AMSK_TARGET },
164
{ "title", AMSK_TITLE },
165
{ "type", AMSK_TYPE },
166
{ "valign", AMSK_VALIGN },
167
{ "value", AMSK_VALUE },
168
{ "vspace", AMSK_VSPACE },
169
{ "width", AMSK_WIDTH },
170
};
171
172
/*
173
** Use binary search to locate a tag in the aAttribute[] table.
174
*/
175
static int findAttr(const char *z){
176
int i, c, first, last;
177
first = 1;
178
last = count(aAttribute) - 1;
179
while( first<=last ){
180
i = (first+last)/2;
181
c = fossil_strcmp(aAttribute[i].zName, z);
182
if( c==0 ){
183
return i;
184
}else if( c<0 ){
185
first = i+1;
186
}else{
187
last = i-1;
188
}
189
}
190
return 0;
191
}
192
193
194
195
/*
196
** Allowed markup.
197
**
198
** Except for MARKUP_INVALID, this must all be in alphabetical order
199
** and in numerical sequence. The first markup type must be zero.
200
** The value for MARKUP_XYZ must correspond to the <xyz> entry
201
** in aMarkup[].
202
*/
203
enum markup_t {
204
MARKUP_INVALID = 0,
205
MARKUP_A,
206
MARKUP_ABBR,
207
MARKUP_ADDRESS,
208
MARKUP_HTML5_ARTICLE,
209
MARKUP_HTML5_ASIDE,
210
MARKUP_B,
211
MARKUP_BIG,
212
MARKUP_BLOCKQUOTE,
213
MARKUP_BR,
214
MARKUP_CENTER,
215
MARKUP_CITE,
216
MARKUP_CODE,
217
MARKUP_COL,
218
MARKUP_COLGROUP,
219
MARKUP_DD,
220
MARKUP_DEL,
221
MARKUP_DETAILS,
222
MARKUP_DFN,
223
MARKUP_DIV,
224
MARKUP_DL,
225
MARKUP_DT,
226
MARKUP_EM,
227
MARKUP_FONT,
228
MARKUP_HTML5_FOOTER,
229
MARKUP_H1,
230
MARKUP_H2,
231
MARKUP_H3,
232
MARKUP_H4,
233
MARKUP_H5,
234
MARKUP_H6,
235
MARKUP_HTML5_HEADER,
236
MARKUP_HR,
237
MARKUP_I,
238
MARKUP_IMG,
239
MARKUP_INS,
240
MARKUP_KBD,
241
MARKUP_LI,
242
MARKUP_HTML5_NAV,
243
MARKUP_NOBR,
244
MARKUP_NOWIKI,
245
MARKUP_OL,
246
MARKUP_P,
247
MARKUP_PRE,
248
MARKUP_S,
249
MARKUP_SAMP,
250
MARKUP_HTML5_SECTION,
251
MARKUP_SMALL,
252
MARKUP_SPAN,
253
MARKUP_STRIKE,
254
MARKUP_STRONG,
255
MARKUP_SUB,
256
MARKUP_SUMMARY,
257
MARKUP_SUP,
258
MARKUP_TABLE,
259
MARKUP_TBODY,
260
MARKUP_TD,
261
MARKUP_TFOOT,
262
MARKUP_TH,
263
MARKUP_THEAD,
264
MARKUP_TITLE,
265
MARKUP_TR,
266
MARKUP_TT,
267
MARKUP_U,
268
MARKUP_UL,
269
MARKUP_VAR,
270
MARKUP_VERBATIM
271
};
272
273
/*
274
** The various markup is divided into the following types:
275
*/
276
#define MUTYPE_SINGLE 0x0001 /* <img>, <br>, or <hr> */
277
#define MUTYPE_BLOCK 0x0002 /* Forms a new paragraph. ex: <p>, <h2> */
278
#define MUTYPE_FONT 0x0004 /* Font changes. ex: <b>, <font>, <sub> */
279
#define MUTYPE_LIST 0x0010 /* Lists. <ol>, <ul>, or <dl> */
280
#define MUTYPE_LI 0x0020 /* List items. <li>, <dd>, <dt> */
281
#define MUTYPE_TABLE 0x0040 /* <table> */
282
#define MUTYPE_TR 0x0080 /* <tr> */
283
#define MUTYPE_TD 0x0100 /* <td> or <th> */
284
#define MUTYPE_SPECIAL 0x0200 /* <nowiki> or <verbatim> */
285
#define MUTYPE_HYPERLINK 0x0400 /* <a> */
286
287
/* MUTYPE values for elements that require strictly nested end-tags */
288
#define MUTYPE_Nested 0x0656
289
290
/*
291
** These markup types must have an end tag.
292
*/
293
#define MUTYPE_STACK (MUTYPE_BLOCK | MUTYPE_FONT | MUTYPE_LIST | MUTYPE_TABLE)
294
295
/*
296
** This markup types are allowed for "inline" text.
297
*/
298
#define MUTYPE_INLINE (MUTYPE_FONT | MUTYPE_HYPERLINK)
299
300
static const struct AllowedMarkup {
301
const char *zName; /* Name of the markup */
302
char iCode; /* The MARKUP_* code */
303
short int iType; /* The MUTYPE_* code */
304
int allowedAttr; /* Allowed attributes on this markup */
305
} aMarkup[] = {
306
{ 0, MARKUP_INVALID, 0, 0 },
307
{ "a", MARKUP_A, MUTYPE_HYPERLINK,
308
AMSK_HREF|AMSK_NAME|AMSK_CLASS|AMSK_TARGET|AMSK_STYLE|
309
AMSK_TITLE},
310
{ "abbr", MARKUP_ABBR, MUTYPE_FONT,
311
AMSK_ID|AMSK_CLASS|AMSK_STYLE|AMSK_TITLE },
312
{ "address", MARKUP_ADDRESS, MUTYPE_BLOCK, AMSK_STYLE },
313
{ "article", MARKUP_HTML5_ARTICLE, MUTYPE_BLOCK,
314
AMSK_ID|AMSK_CLASS|AMSK_STYLE },
315
{ "aside", MARKUP_HTML5_ASIDE, MUTYPE_BLOCK,
316
AMSK_ID|AMSK_CLASS|AMSK_STYLE },
317
{ "b", MARKUP_B, MUTYPE_FONT, AMSK_STYLE },
318
{ "big", MARKUP_BIG, MUTYPE_FONT, AMSK_STYLE },
319
{ "blockquote", MARKUP_BLOCKQUOTE, MUTYPE_BLOCK, AMSK_STYLE },
320
{ "br", MARKUP_BR, MUTYPE_SINGLE, AMSK_CLEAR },
321
{ "center", MARKUP_CENTER, MUTYPE_BLOCK, AMSK_STYLE },
322
{ "cite", MARKUP_CITE, MUTYPE_FONT, AMSK_STYLE },
323
{ "code", MARKUP_CODE, MUTYPE_FONT, AMSK_STYLE },
324
{ "col", MARKUP_COL, MUTYPE_SINGLE,
325
AMSK_ALIGN|AMSK_CLASS|AMSK_COLSPAN|AMSK_WIDTH|AMSK_STYLE },
326
{ "colgroup", MARKUP_COLGROUP, MUTYPE_BLOCK,
327
AMSK_ALIGN|AMSK_CLASS|AMSK_COLSPAN|AMSK_WIDTH|AMSK_STYLE},
328
{ "dd", MARKUP_DD, MUTYPE_LI, AMSK_STYLE },
329
{ "del", MARKUP_DEL, MUTYPE_FONT, AMSK_STYLE },
330
{ "details", MARKUP_DETAILS, MUTYPE_BLOCK,
331
AMSK_ID|AMSK_CLASS|AMSK_STYLE },
332
{ "dfn", MARKUP_DFN, MUTYPE_FONT, AMSK_STYLE },
333
{ "div", MARKUP_DIV, MUTYPE_BLOCK,
334
AMSK_ID|AMSK_CLASS|AMSK_STYLE },
335
{ "dl", MARKUP_DL, MUTYPE_LIST,
336
AMSK_COMPACT|AMSK_STYLE },
337
{ "dt", MARKUP_DT, MUTYPE_LI, AMSK_STYLE },
338
{ "em", MARKUP_EM, MUTYPE_FONT, AMSK_STYLE },
339
{ "font", MARKUP_FONT, MUTYPE_FONT,
340
AMSK_COLOR|AMSK_FACE|AMSK_SIZE|AMSK_STYLE },
341
{ "footer", MARKUP_HTML5_FOOTER, MUTYPE_BLOCK,
342
AMSK_ID|AMSK_CLASS|AMSK_STYLE },
343
{ "h1", MARKUP_H1, MUTYPE_BLOCK,
344
AMSK_ALIGN|AMSK_CLASS|AMSK_STYLE },
345
{ "h2", MARKUP_H2, MUTYPE_BLOCK,
346
AMSK_ALIGN|AMSK_CLASS|AMSK_STYLE },
347
{ "h3", MARKUP_H3, MUTYPE_BLOCK,
348
AMSK_ALIGN|AMSK_CLASS|AMSK_STYLE },
349
{ "h4", MARKUP_H4, MUTYPE_BLOCK,
350
AMSK_ALIGN|AMSK_CLASS|AMSK_STYLE },
351
{ "h5", MARKUP_H5, MUTYPE_BLOCK,
352
AMSK_ALIGN|AMSK_CLASS|AMSK_STYLE },
353
{ "h6", MARKUP_H6, MUTYPE_BLOCK,
354
AMSK_ALIGN|AMSK_CLASS|AMSK_STYLE },
355
{ "header", MARKUP_HTML5_HEADER, MUTYPE_BLOCK,
356
AMSK_ID|AMSK_CLASS|AMSK_STYLE },
357
{ "hr", MARKUP_HR, MUTYPE_SINGLE,
358
AMSK_ALIGN|AMSK_COLOR|AMSK_SIZE|AMSK_WIDTH|
359
AMSK_STYLE|AMSK_CLASS },
360
{ "i", MARKUP_I, MUTYPE_FONT, AMSK_STYLE },
361
{ "img", MARKUP_IMG, MUTYPE_SINGLE,
362
AMSK_ALIGN|AMSK_ALT|AMSK_BORDER|AMSK_HEIGHT|
363
AMSK_HSPACE|AMSK_SRC|AMSK_VSPACE|AMSK_WIDTH|AMSK_STYLE },
364
{ "ins", MARKUP_INS, MUTYPE_FONT, AMSK_STYLE },
365
{ "kbd", MARKUP_KBD, MUTYPE_FONT, AMSK_STYLE },
366
{ "li", MARKUP_LI, MUTYPE_LI,
367
AMSK_TYPE|AMSK_VALUE|AMSK_STYLE },
368
{ "nav", MARKUP_HTML5_NAV, MUTYPE_BLOCK,
369
AMSK_ID|AMSK_CLASS|AMSK_STYLE },
370
{ "nobr", MARKUP_NOBR, MUTYPE_FONT, 0 },
371
{ "nowiki", MARKUP_NOWIKI, MUTYPE_SPECIAL, 0 },
372
{ "ol", MARKUP_OL, MUTYPE_LIST,
373
AMSK_START|AMSK_TYPE|AMSK_COMPACT|AMSK_STYLE },
374
{ "p", MARKUP_P, MUTYPE_BLOCK,
375
AMSK_ALIGN|AMSK_CLASS|AMSK_STYLE },
376
{ "pre", MARKUP_PRE, MUTYPE_BLOCK, AMSK_STYLE },
377
{ "s", MARKUP_S, MUTYPE_FONT, AMSK_STYLE },
378
{ "samp", MARKUP_SAMP, MUTYPE_FONT, AMSK_STYLE },
379
{ "section", MARKUP_HTML5_SECTION, MUTYPE_BLOCK,
380
AMSK_ID|AMSK_CLASS|AMSK_STYLE },
381
{ "small", MARKUP_SMALL, MUTYPE_FONT, AMSK_STYLE },
382
{ "span", MARKUP_SPAN, MUTYPE_BLOCK,
383
AMSK_ALIGN|AMSK_CLASS|AMSK_STYLE },
384
{ "strike", MARKUP_STRIKE, MUTYPE_FONT, AMSK_STYLE },
385
{ "strong", MARKUP_STRONG, MUTYPE_FONT, AMSK_STYLE },
386
{ "sub", MARKUP_SUB, MUTYPE_FONT, AMSK_STYLE },
387
{ "summary", MARKUP_SUMMARY, MUTYPE_BLOCK,
388
AMSK_ALIGN|AMSK_CLASS|AMSK_STYLE },
389
{ "sup", MARKUP_SUP, MUTYPE_FONT, AMSK_STYLE },
390
{ "table", MARKUP_TABLE, MUTYPE_TABLE,
391
AMSK_ALIGN|AMSK_BGCOLOR|AMSK_BORDER|AMSK_CELLPADDING|
392
AMSK_CELLSPACING|AMSK_HSPACE|AMSK_VSPACE|AMSK_CLASS|
393
AMSK_STYLE },
394
{ "tbody", MARKUP_TBODY, MUTYPE_BLOCK,
395
AMSK_ALIGN|AMSK_CLASS|AMSK_STYLE },
396
{ "td", MARKUP_TD, MUTYPE_TD,
397
AMSK_ALIGN|AMSK_BGCOLOR|AMSK_COLSPAN|
398
AMSK_ROWSPAN|AMSK_VALIGN|AMSK_CLASS|AMSK_STYLE },
399
{ "tfoot", MARKUP_TFOOT, MUTYPE_BLOCK,
400
AMSK_ALIGN|AMSK_CLASS|AMSK_STYLE },
401
{ "th", MARKUP_TH, MUTYPE_TD,
402
AMSK_ALIGN|AMSK_BGCOLOR|AMSK_COLSPAN|
403
AMSK_ROWSPAN|AMSK_VALIGN|AMSK_CLASS|AMSK_STYLE },
404
{ "thead", MARKUP_THEAD, MUTYPE_BLOCK,
405
AMSK_ALIGN|AMSK_CLASS|AMSK_STYLE },
406
{ "title", MARKUP_TITLE, MUTYPE_BLOCK, 0 },
407
{ "tr", MARKUP_TR, MUTYPE_TR,
408
AMSK_ALIGN|AMSK_BGCOLOR|AMSK_VALIGN|AMSK_CLASS|AMSK_STYLE },
409
{ "tt", MARKUP_TT, MUTYPE_FONT, AMSK_STYLE },
410
{ "u", MARKUP_U, MUTYPE_FONT, AMSK_STYLE },
411
{ "ul", MARKUP_UL, MUTYPE_LIST,
412
AMSK_TYPE|AMSK_COMPACT|AMSK_STYLE },
413
{ "var", MARKUP_VAR, MUTYPE_FONT, AMSK_STYLE },
414
{ "verbatim", MARKUP_VERBATIM, MUTYPE_SPECIAL,
415
AMSK_ID|AMSK_TYPE },
416
};
417
418
void show_allowed_wiki_markup( void ){
419
int i; /* loop over allowedAttr */
420
for( i=1 ; i<=count(aMarkup) - 1 ; i++ ){
421
@ &lt;%s(aMarkup[i].zName)&gt;
422
}
423
}
424
425
/*
426
** Use binary search to locate a tag in the aMarkup[] table.
427
*/
428
static int findTag(const char *z){
429
int i, c, first, last;
430
first = 1;
431
last = count(aMarkup) - 1;
432
while( first<=last ){
433
i = (first+last)/2;
434
c = fossil_strcmp(aMarkup[i].zName, z);
435
if( c==0 ){
436
assert( aMarkup[i].iCode==i );
437
return i;
438
}else if( c<0 ){
439
first = i+1;
440
}else{
441
last = i-1;
442
}
443
}
444
return MARKUP_INVALID;
445
}
446
447
/*
448
** Token types
449
*/
450
#define TOKEN_MARKUP 1 /* <...> */
451
#define TOKEN_CHARACTER 2 /* "&" or "<" not part of markup */
452
#define TOKEN_LINK 3 /* [...] */
453
#define TOKEN_PARAGRAPH 4 /* blank lines */
454
#define TOKEN_NEWLINE 5 /* A single "\n" */
455
#define TOKEN_BUL_LI 6 /* " * " */
456
#define TOKEN_NUM_LI 7 /* " # " */
457
#define TOKEN_ENUM 8 /* " \(?\d+[.)]? " */
458
#define TOKEN_INDENT 9 /* " " */
459
#define TOKEN_RAW 10 /* Output exactly (used when wiki-use-html==1) */
460
#define TOKEN_TEXT 11 /* None of the above */
461
462
/*
463
** State flags. Save the lower 16 bits for the WIKI_* flags.
464
*/
465
#define AT_NEWLINE 0x0010000 /* At start of a line */
466
#define AT_PARAGRAPH 0x0020000 /* At start of a paragraph */
467
#define ALLOW_WIKI 0x0040000 /* Allow wiki markup */
468
#define ALLOW_LINKS 0x0080000 /* Allow [...] hyperlinks */
469
#define FONT_MARKUP_ONLY 0x0100000 /* Only allow MUTYPE_FONT markup */
470
#define IN_LIST 0x0200000 /* Within wiki <ul> or <ol> */
471
472
/*
473
** Current state of the rendering engine
474
*/
475
typedef struct Renderer Renderer;
476
struct Renderer {
477
Blob *pOut; /* Output appended to this blob */
478
int state; /* Flag that govern rendering */
479
int mRender; /* Mask of RENDER_* values to return */
480
unsigned renderFlags; /* Flags from the client */
481
int wikiList; /* Current wiki list type */
482
int inVerbatim; /* True in <verbatim> mode */
483
int preVerbState; /* Value of state prior to verbatim */
484
int wantAutoParagraph; /* True if a <p> is desired */
485
int inAutoParagraph; /* True if within an automatic paragraph */
486
int pikchrHtmlFlags; /* Flags for pikchr_to_html() */
487
const char *zVerbatimId; /* The id= attribute of <verbatim> */
488
int nStack; /* Number of elements on the stack */
489
int nAlloc; /* Space allocated for aStack */
490
struct sStack {
491
short iCode; /* Markup code */
492
short allowWiki; /* ALLOW_WIKI if wiki allowed before tag */
493
const char *zId; /* ID attribute or NULL */
494
} *aStack;
495
};
496
497
/*
498
** Return TRUE if HTML should be used as the sole markup language for wiki.
499
**
500
** On first invocation, this routine consults the "wiki-use-html" setting.
501
** It caches the result for subsequent invocations, under the assumption
502
** that the setting will not change.
503
*/
504
static int wikiUsesHtml(void){
505
static int r = -1;
506
if( r<0 ) r = db_get_boolean("wiki-use-html", 0);
507
return r;
508
}
509
510
/*
511
** z points to a "<" character. Check to see if this is the start of
512
** a valid markup. If it is, return the total number of characters in
513
** the markup including the initial "<" and the terminating ">". If
514
** it is not well-formed markup, return 0.
515
*/
516
int html_tag_length(const char *z){
517
int n = 1;
518
int inparen = 0;
519
int c;
520
if( z[n]=='/' ){ n++; }
521
if( !fossil_isalpha(z[n]) ) return 0;
522
while( fossil_isalnum(z[n]) || z[n]=='-' ){ n++; }
523
c = z[n];
524
if( c=='/' && z[n+1]=='>' ){ return n+2; }
525
if( c!='>' && !fossil_isspace(c) ) return 0;
526
while( (c = z[n])!=0 && (c!='>' || inparen) ){
527
if( c==inparen ){
528
inparen = 0;
529
}else if( inparen==0 && (c=='"' || c=='\'') ){
530
inparen = c;
531
}
532
n++;
533
}
534
if( z[n]!='>' ) return 0;
535
return n+1;
536
}
537
538
/*
539
** z points to a "\n" character. Check to see if this newline is
540
** followed by one or more blank lines. If it is, return the number
541
** of characters through the closing "\n". If not, return 0.
542
*/
543
static int paragraphBreakLength(const char *z){
544
int i, n;
545
int nNewline = 1;
546
for(i=1, n=0; fossil_isspace(z[i]); i++){
547
if( z[i]=='\n' ){
548
nNewline++;
549
n = i;
550
}
551
}
552
if( nNewline>=2 ){
553
return n+1;
554
}else{
555
return 0;
556
}
557
}
558
559
/*
560
** Return the number of characters until the next "interesting"
561
** characters.
562
**
563
** Interesting characters are:
564
**
565
** <
566
** &
567
** \n
568
** [
569
**
570
** The "[" is only considered if flags contain ALLOW_LINKS or ALLOW_WIKI.
571
** The "\n" is only considered interesting if the flags contains ALLOW_WIKI.
572
*/
573
static int textLength(const char *z, int flags){
574
const char *zReject;
575
if( flags & ALLOW_WIKI ){
576
zReject = "<&[\n";
577
}else if( flags & ALLOW_LINKS ){
578
zReject = "<&[";
579
}else{
580
zReject = "<&";
581
}
582
return strcspn(z, zReject);
583
}
584
585
/*
586
** Return true if z[] begins with an HTML character element.
587
*/
588
static int isElement(const char *z){
589
int i;
590
assert( z[0]=='&' );
591
if( z[1]=='#' ){
592
for(i=2; fossil_isdigit(z[i]); i++){}
593
return i>2 && z[i]==';';
594
}else{
595
for(i=1; fossil_isalpha(z[i]); i++){}
596
return i>1 && z[i]==';';
597
}
598
}
599
600
/*
601
** Check to see if the z[] string is the beginning of a wiki list item.
602
** If it is, return the length of the bullet text. Otherwise return 0.
603
*/
604
static int listItemLength(const char *z, const char listChar){
605
int i, n;
606
n = 0;
607
i = 0;
608
while( z[n]==' ' || z[n]=='\t' ){
609
if( z[n]=='\t' ) i++;
610
i++;
611
n++;
612
}
613
if( i<2 || z[n]!=listChar ) return 0;
614
n++;
615
i = 0;
616
while( z[n]==' ' || z[n]=='\t' ){
617
if( z[n]=='\t' ) i++;
618
i++;
619
n++;
620
}
621
if( i<2 || fossil_isspace(z[n]) ) return 0;
622
return n;
623
}
624
625
/*
626
** Check to see if the z[] string is the beginning of an enumeration value.
627
** If it is, return the length of the bullet text. Otherwise return 0.
628
**
629
** Syntax:
630
** * a tab or two or more spaces
631
** * one or more digits
632
** * optional "."
633
** * another tab or two ore more spaces.
634
**
635
*/
636
static int enumLength(const char *z){
637
int i, n;
638
n = 0;
639
i = 0;
640
while( z[n]==' ' || z[n]=='\t' ){
641
if( z[n]=='\t' ) i++;
642
i++;
643
n++;
644
}
645
if( i<2 ) return 0;
646
for(i=0; fossil_isdigit(z[n]); i++, n++){}
647
if( i==0 ) return 0;
648
if( z[n]=='.' ){
649
n++;
650
}
651
i = 0;
652
while( z[n]==' ' || z[n]=='\t' ){
653
if( z[n]=='\t' ) i++;
654
i++;
655
n++;
656
}
657
if( i<2 || fossil_isspace(z[n]) ) return 0;
658
return n;
659
}
660
661
/*
662
** Check to see if the z[] string is the beginning of an indented
663
** paragraph. If it is, return the length of the indent. Otherwise
664
** return 0.
665
*/
666
static int indentLength(const char *z){
667
int i, n;
668
n = 0;
669
i = 0;
670
while( z[n]==' ' || z[n]=='\t' ){
671
if( z[n]=='\t' ) i++;
672
i++;
673
n++;
674
}
675
if( i<2 || fossil_isspace(z[n]) ) return 0;
676
return n;
677
}
678
679
/*
680
** Check to see if the z[] string is a wiki hyperlink. If it is,
681
** return the length of the hyperlink. Otherwise return 0.
682
*/
683
static int linkLength(const char *z){
684
int n;
685
assert( z[0]=='[' );
686
for(n=0; z[n] && z[n]!=']'; n++){}
687
if( z[n]==']' ){
688
return n+1;
689
}else{
690
return 0;
691
}
692
}
693
694
/*
695
** Get the next wiki token.
696
**
697
** z points to the start of a token. Return the number of
698
** characters in that token. Write the token type into *pTokenType.
699
*/
700
static int nextWikiToken(const char *z, Renderer *p, int *pTokenType){
701
int n;
702
if( z[0]=='<' ){
703
n = html_tag_length(z);
704
if( n>0 ){
705
p->mRender |= RENDER_TAG;
706
*pTokenType = TOKEN_MARKUP;
707
return n;
708
}else{
709
p->mRender |= RENDER_BADTAG;
710
*pTokenType = TOKEN_CHARACTER;
711
return 1;
712
}
713
}
714
if( z[0]=='&' ){
715
p->mRender |= RENDER_ENTITY;
716
if( (p->inVerbatim || !isElement(z)) ){
717
*pTokenType = TOKEN_CHARACTER;
718
return 1;
719
}
720
}
721
if( (p->state & ALLOW_WIKI)!=0 ){
722
if( z[0]=='\n' ){
723
n = paragraphBreakLength(z);
724
if( n>0 ){
725
*pTokenType = TOKEN_PARAGRAPH;
726
return n;
727
}else{
728
*pTokenType = TOKEN_NEWLINE;
729
return 1;
730
}
731
}
732
if( (p->state & AT_NEWLINE)!=0 && fossil_isspace(z[0]) ){
733
n = listItemLength(z, '*');
734
if( n>0 ){
735
*pTokenType = TOKEN_BUL_LI;
736
return n;
737
}
738
n = listItemLength(z, '#');
739
if( n>0 ){
740
*pTokenType = TOKEN_NUM_LI;
741
return n;
742
}
743
n = enumLength(z);
744
if( n>0 ){
745
*pTokenType = TOKEN_ENUM;
746
return n;
747
}
748
}
749
if( (p->state & AT_PARAGRAPH)!=0 && fossil_isspace(z[0]) ){
750
n = indentLength(z);
751
if( n>0 ){
752
*pTokenType = TOKEN_INDENT;
753
return n;
754
}
755
}
756
if( z[0]=='[' ){
757
if( (n = linkLength(z))>0 ){
758
*pTokenType = TOKEN_LINK;
759
return n;
760
}else if( p->state & WIKI_MARK ){
761
blob_append_string(p->pOut, "<mark>");
762
p->mRender |= RENDER_BADLINK|RENDER_MARK;
763
}else{
764
p->mRender |= RENDER_BADLINK;
765
}
766
}
767
}else if( (p->state & ALLOW_LINKS)!=0 && z[0]=='[' ){
768
if( (n = linkLength(z))>0 ){
769
*pTokenType = TOKEN_LINK;
770
return n;
771
}else if( p->state & WIKI_MARK ){
772
blob_append_string(p->pOut, "<mark>");
773
p->mRender |= RENDER_BADLINK|RENDER_MARK;
774
}else{
775
p->mRender |= RENDER_BADLINK;
776
}
777
}
778
*pTokenType = TOKEN_TEXT;
779
return 1 + textLength(z+1, p->state);
780
}
781
782
/*
783
** Parse only Wiki links, return everything else as TOKEN_RAW.
784
**
785
** z points to the start of a token. Return the number of
786
** characters in that token. Write the token type into *pTokenType.
787
*/
788
static int nextRawToken(const char *z, Renderer *p, int *pTokenType){
789
int n;
790
if( z[0]=='[' ){
791
if( (n = linkLength(z))>0 ){
792
*pTokenType = TOKEN_LINK;
793
return n;
794
}else if( p->state & WIKI_MARK ){
795
blob_append_string(p->pOut, "<mark>");
796
p->mRender |= RENDER_BADLINK|RENDER_MARK;
797
}else{
798
p->mRender |= RENDER_BADLINK;
799
}
800
}
801
*pTokenType = TOKEN_RAW;
802
return 1 + textLength(z+1, p->state);
803
}
804
805
/*
806
** A single markup is parsed into an instance of the following
807
** structure.
808
*/
809
typedef struct ParsedMarkup ParsedMarkup;
810
struct ParsedMarkup {
811
unsigned char endTag; /* True if </...> instead of <...> */
812
unsigned char iCode; /* MARKUP_* */
813
unsigned char nAttr; /* Number of attributes */
814
unsigned short iType; /* MUTYPE_* */
815
struct {
816
unsigned char iACode; /* ATTR_* */
817
char *zValue; /* Argument to this attribute. Might be NULL */
818
char cTerm; /* Original argument termination character */
819
} aAttr[10];
820
};
821
822
/*
823
** z[] is an HTML markup element - something that begins with '<'.
824
** Parse this element into the p structure.
825
**
826
** The content of z[] might be modified by converting characters
827
** to lowercase and by inserting some "\000" characters.
828
*/
829
static int parseMarkup(ParsedMarkup *p, char *z){
830
int i, j, c;
831
int iACode;
832
char *zValue;
833
int seen = 0;
834
char zTag[100];
835
836
if( z[1]=='/' ){
837
p->endTag = 1;
838
i = 2;
839
}else{
840
p->endTag = 0;
841
i = 1;
842
}
843
j = 0;
844
while( fossil_isalnum(z[i]) ){
845
if( j<(int)sizeof(zTag)-1 ) zTag[j++] = fossil_tolower(z[i]);
846
i++;
847
}
848
zTag[j] = 0;
849
p->iCode = findTag(zTag);
850
p->iType = aMarkup[p->iCode].iType;
851
p->nAttr = 0;
852
c = 0;
853
if( z[i]=='-' ){
854
p->aAttr[0].iACode = iACode = ATTR_ID;
855
i++;
856
p->aAttr[0].zValue = &z[i];
857
while( fossil_isalnum(z[i]) ){ i++; }
858
p->aAttr[0].cTerm = c = z[i];
859
z[i++] = 0;
860
p->nAttr = 1;
861
if( c=='>' ) return 0;
862
}
863
while( fossil_isspace(z[i]) ){ i++; }
864
while( c!='>' && p->nAttr<8 && fossil_isalpha(z[i]) ){
865
int attrOk; /* True to preserve attribute. False to ignore it */
866
j = 0;
867
while( fossil_isalnum(z[i]) ){
868
if( j<(int)sizeof(zTag)-1 ) zTag[j++] = fossil_tolower(z[i]);
869
i++;
870
}
871
zTag[j] = 0;
872
p->aAttr[p->nAttr].iACode = iACode = findAttr(zTag);
873
attrOk = iACode!=0 && (seen & aAttribute[iACode].iMask)==0;
874
while( fossil_isspace(z[i]) ){ z++; }
875
if( z[i]!='=' ){
876
p->aAttr[p->nAttr].zValue = 0;
877
p->aAttr[p->nAttr].cTerm = 0;
878
c = 0;
879
}else{
880
i++;
881
while( fossil_isspace(z[i]) ){ z++; }
882
if( z[i]=='"' ){
883
i++;
884
zValue = &z[i];
885
while( z[i] && z[i]!='"' ){ i++; }
886
}else if( z[i]=='\'' ){
887
i++;
888
zValue = &z[i];
889
while( z[i] && z[i]!='\'' ){ i++; }
890
}else{
891
zValue = &z[i];
892
while( !fossil_isspace(z[i]) && z[i]!='>' ){
893
if( z[i]=='\'' || z[i]=='"' ) attrOk = 0;
894
i++;
895
}
896
}
897
if( attrOk ){
898
p->aAttr[p->nAttr].zValue = zValue;
899
p->aAttr[p->nAttr].cTerm = c = z[i];
900
if( z[i]==0 ){
901
i--;
902
}else{
903
z[i] = 0;
904
}
905
}
906
i++;
907
}
908
if( attrOk ){
909
seen |= aAttribute[iACode].iMask;
910
p->nAttr++;
911
}
912
while( fossil_isspace(z[i]) ){ i++; }
913
if( z[i]==0 || z[i]=='>' || (z[i]=='/' && z[i+1]=='>') ) break;
914
}
915
return seen;
916
}
917
918
/*
919
** Render markup on the given blob.
920
*/
921
static void renderMarkup(Blob *pOut, ParsedMarkup *p){
922
int i;
923
if( p->endTag ){
924
blob_appendf(pOut, "</%s>", aMarkup[p->iCode].zName);
925
}else{
926
blob_appendf(pOut, "<%s", aMarkup[p->iCode].zName);
927
for(i=0; i<p->nAttr; i++){
928
blob_appendf(pOut, " %s", aAttribute[p->aAttr[i].iACode].zName);
929
if( p->aAttr[i].zValue ){
930
const char *zVal = p->aAttr[i].zValue;
931
if( p->aAttr[i].iACode==ATTR_SRC && zVal[0]=='/' ){
932
blob_appendf(pOut, "=\"%R%s\"", zVal);
933
}else{
934
blob_appendf(pOut, "=\"%s\"", zVal);
935
}
936
}
937
}
938
if (p->iType & MUTYPE_SINGLE){
939
blob_append_string(pOut, " /");
940
}
941
blob_append_char(pOut, '>');
942
}
943
}
944
945
/*
946
** When the markup was parsed, some "\000" may have been inserted.
947
** This routine restores to those "\000" values back to their
948
** original content.
949
*/
950
static void unparseMarkup(ParsedMarkup *p){
951
int i, n;
952
for(i=0; i<p->nAttr; i++){
953
char *z = p->aAttr[i].zValue;
954
if( z==0 ) continue;
955
if( p->aAttr[i].cTerm ){
956
n = strlen(z);
957
z[n] = p->aAttr[i].cTerm;
958
}
959
}
960
}
961
962
/*
963
** Return the value of attribute attrId. Return NULL if there is no
964
** ID attribute.
965
*/
966
static const char *attributeValue(ParsedMarkup *p, int attrId){
967
int i;
968
for(i=0; i<p->nAttr; i++){
969
if( p->aAttr[i].iACode==attrId ){
970
return p->aAttr[i].zValue;
971
}
972
}
973
return 0;
974
}
975
976
/*
977
** Return the ID attribute for markup. Return NULL if there is no
978
** ID attribute.
979
*/
980
static const char *markupId(ParsedMarkup *p){
981
return attributeValue(p, ATTR_ID);
982
}
983
984
/*
985
** Check markup pMarkup to see if it is a hyperlink with class "button"
986
** that is follows by simple text and an </a> only. Example:
987
**
988
** <a class="button" href="../index.wiki">Index</a>
989
**
990
** If the markup matches this pattern, and if the WIKI_BUTTONS flag was
991
** passed to wiki_convert(), then transform this link into a submenu
992
** button, skip the text, and set *pN equal to the total length of the
993
** text through the end of </a> and return true. If the markup does
994
** not match or if WIKI_BUTTONS is not set, then make no changes to *pN
995
** and return false.
996
*/
997
static int isButtonHyperlink(
998
Renderer *p, /* Renderer state */
999
ParsedMarkup *pMarkup, /* Potential button markup */
1000
const char *z, /* Complete text of Wiki */
1001
int *pN /* Characters of z[] consumed */
1002
){
1003
const char *zClass;
1004
const char *zHref;
1005
char *zTag;
1006
int i, j;
1007
if( (p->state & WIKI_BUTTONS)==0 ) return 0;
1008
zClass = attributeValue(pMarkup, ATTR_CLASS);
1009
if( zClass==0 ) return 0;
1010
if( fossil_strcmp(zClass, "button")!=0 ) return 0;
1011
zHref = attributeValue(pMarkup, ATTR_HREF);
1012
if( zHref==0 ) return 0;
1013
i = *pN;
1014
while( z[i] && z[i]!='<' ){ i++; }
1015
if( fossil_strnicmp(&z[i], "</a>",4)!=0 ) return 0;
1016
for(j=*pN; fossil_isspace(z[j]); j++){}
1017
zTag = mprintf("%.*s", i-j, &z[j]);
1018
j = (int)strlen(zTag);
1019
while( j>0 && fossil_isspace(zTag[j-1]) ){ j--; }
1020
if( j==0 ) return 0;
1021
style_submenu_element(zTag, "%s", zHref);
1022
*pN = i+4;
1023
return 1;
1024
}
1025
1026
/*
1027
** Pop a single element off of the stack. As the element is popped,
1028
** output its end tag if it is not a </div> tag.
1029
*/
1030
static void popStack(Renderer *p){
1031
if( p->nStack ){
1032
int iCode;
1033
p->nStack--;
1034
iCode = p->aStack[p->nStack].iCode;
1035
if( (iCode!=MARKUP_DIV || p->aStack[p->nStack].zId==0) && p->pOut ){
1036
blob_appendf(p->pOut, "</%s>", aMarkup[iCode].zName);
1037
}
1038
}
1039
}
1040
1041
/*
1042
** Push a new markup value onto the stack. Enlarge the stack
1043
** if necessary.
1044
*/
1045
static void pushStackWithId(Renderer *p, int elem, const char *zId, int w){
1046
if( p->nStack>=p->nAlloc ){
1047
p->nAlloc = p->nAlloc*2 + 100;
1048
p->aStack = fossil_realloc(p->aStack, p->nAlloc*sizeof(p->aStack[0]));
1049
}
1050
p->aStack[p->nStack].iCode = elem;
1051
p->aStack[p->nStack].zId = zId;
1052
p->aStack[p->nStack].allowWiki = w;
1053
p->nStack++;
1054
}
1055
static void pushStack(Renderer *p, int elem){
1056
pushStackWithId(p, elem, 0, 0);
1057
}
1058
1059
/*
1060
** Pop the stack until the top-most iTag element is removed.
1061
** If there is no iTag element on the stack, this routine
1062
** is a no-op.
1063
*/
1064
static void popStackToTag(Renderer *p, int iTag){
1065
int i;
1066
for(i=p->nStack-1; i>=0; i--){
1067
if( p->aStack[i].iCode!=iTag ) continue;
1068
if( p->aStack[i].zId ) continue;
1069
break;
1070
}
1071
if( i<0 ) return;
1072
while( p->nStack>i ){
1073
popStack(p);
1074
}
1075
}
1076
1077
/*
1078
** Attempt to find a find a tag of type iTag with id zId. Return -1
1079
** if not found. If found, return its stack level.
1080
*/
1081
static int findTagWithId(Renderer *p, int iTag, const char *zId){
1082
int i;
1083
assert( zId!=0 );
1084
for(i=p->nStack-1; i>=0; i--){
1085
if( p->aStack[i].iCode!=iTag ) continue;
1086
if( p->aStack[i].zId==0 ) continue;
1087
if( fossil_strcmp(zId, p->aStack[i].zId)!=0 ) continue;
1088
break;
1089
}
1090
return i;
1091
}
1092
1093
/*
1094
** Pop the stack until the top-most element of the stack
1095
** is an element that matches the type in iMask. Return
1096
** code of the markup element that is on left on top of the stack.
1097
** If the stack does not have an element
1098
** that matches iMask, then leave the stack unchanged and
1099
** return false (MARKUP_INVALID).
1100
*/
1101
static int backupToType(Renderer *p, int iMask){
1102
int i;
1103
for(i=p->nStack-1; i>=0; i--){
1104
if( aMarkup[p->aStack[i].iCode].iType & iMask ) break;
1105
}
1106
if( i<0 ) return 0;
1107
i++;
1108
while( p->nStack>i ){
1109
popStack(p);
1110
}
1111
return p->aStack[i-1].iCode;
1112
}
1113
1114
/*
1115
** Begin a new paragraph if that something that is needed.
1116
*/
1117
static void startAutoParagraph(Renderer *p){
1118
if( p->wantAutoParagraph==0 ) return;
1119
if( p->state & WIKI_LINKSONLY ) return;
1120
if( p->wikiList==MARKUP_OL || p->wikiList==MARKUP_UL ) return;
1121
blob_append_string(p->pOut, "<p>");
1122
p->wantAutoParagraph = 0;
1123
p->inAutoParagraph = 1;
1124
}
1125
1126
/*
1127
** End a paragraph if we are in one.
1128
*/
1129
static void endAutoParagraph(Renderer *p){
1130
if( p->inAutoParagraph ){
1131
p->inAutoParagraph = 0;
1132
}
1133
}
1134
1135
/*
1136
** If the input string corresponds to an existing baseline,
1137
** return true.
1138
*/
1139
static int is_valid_hname(const char *z){
1140
int n = strlen(z);
1141
if( n<4 || n>HNAME_MAX ) return 0;
1142
if( !validate16(z, n) ) return 0;
1143
return 1;
1144
}
1145
1146
/*
1147
** Return TRUE if a hash name corresponds to an artifact in this
1148
** repository.
1149
*/
1150
static int in_this_repo(const char *zUuid){
1151
static Stmt q;
1152
int rc;
1153
int n;
1154
char zU2[HNAME_MAX+1];
1155
db_static_prepare(&q,
1156
"SELECT 1 FROM blob WHERE uuid>=:u AND uuid<:u2"
1157
);
1158
db_bind_text(&q, ":u", zUuid);
1159
n = (int)strlen(zUuid);
1160
if( n>=(int)sizeof(zU2) ) n = sizeof(zU2)-1;
1161
memcpy(zU2, zUuid, n);
1162
zU2[n-1]++;
1163
zU2[n] = 0;
1164
db_bind_text(&q, ":u2", zU2);
1165
rc = db_step(&q);
1166
db_reset(&q);
1167
return rc==SQLITE_ROW;
1168
}
1169
1170
/*
1171
** zTarget is guaranteed to be a UUID. It might be the UUID of a ticket.
1172
** If it is, store in *pClosed a true or false depending on whether or not
1173
** the ticket is closed and return true. If zTarget
1174
** is not the UUID of a ticket, return false.
1175
*/
1176
int is_ticket(
1177
const char *zTarget, /* Ticket UUID */
1178
int *pClosed /* True if the ticket is closed */
1179
){
1180
static Stmt q;
1181
int n;
1182
int rc;
1183
char zLower[HNAME_MAX+1];
1184
char zUpper[HNAME_MAX+1];
1185
n = strlen(zTarget);
1186
memcpy(zLower, zTarget, n+1);
1187
canonical16(zLower, n+1);
1188
memcpy(zUpper, zLower, n+1);
1189
zUpper[n-1]++;
1190
if( !db_static_stmt_is_init(&q) ){
1191
char *zClosedExpr = db_get("ticket-closed-expr", "status='Closed'");
1192
db_static_prepare(&q,
1193
"SELECT %z FROM ticket "
1194
" WHERE tkt_uuid>=:lwr AND tkt_uuid<:upr",
1195
zClosedExpr /*safe-for-%s*/
1196
);
1197
}
1198
db_bind_text(&q, ":lwr", zLower);
1199
db_bind_text(&q, ":upr", zUpper);
1200
if( db_step(&q)==SQLITE_ROW ){
1201
rc = 1;
1202
*pClosed = db_column_int(&q, 0);
1203
}else{
1204
rc = 0;
1205
}
1206
db_reset(&q);
1207
return rc;
1208
}
1209
1210
/*
1211
** Return a pointer to the name part of zTarget (skipping the "wiki:" prefix
1212
** if there is one) if zTarget is a valid wiki page name. Return NULL if
1213
** zTarget names a page that does not exist.
1214
*/
1215
static const char *validWikiPageName(int mFlags, const char *zTarget){
1216
if( strncmp(zTarget, "wiki:", 5)==0
1217
&& wiki_name_is_wellformed((const unsigned char*)zTarget) ){
1218
return zTarget+5;
1219
}
1220
if( strcmp(zTarget, "Sandbox")==0 ) return zTarget;
1221
if( wiki_name_is_wellformed((const unsigned char *)zTarget)
1222
&& ((mFlags & WIKI_NOBADLINKS)==0 ||
1223
db_exists("SELECT 1 FROM tag WHERE tagname GLOB 'wiki-%q'"
1224
" AND (SELECT value FROM tagxref WHERE tagid=tag.tagid"
1225
" ORDER BY mtime DESC LIMIT 1) > 0", zTarget))
1226
){
1227
return zTarget;
1228
}
1229
return 0;
1230
}
1231
1232
static const char *wikiOverrideHash = 0;
1233
1234
/*
1235
** Fossil-wiki hyperlinks to wiki pages should be overridden to the
1236
** hash value supplied. If the value is NULL, then override is cancelled
1237
** and all overwrites operate normally.
1238
*/
1239
void wiki_hyperlink_override(const char *zUuid){
1240
wikiOverrideHash = zUuid;
1241
}
1242
1243
1244
/*
1245
** If links to wiki page zTarget should be redirected to some historical
1246
** version of that page, then return the hash of the historical version.
1247
** If no override is required, return NULL.
1248
*/
1249
static const char *wiki_is_overridden(const char *zTarget){
1250
if( wikiOverrideHash==0 ) return 0;
1251
/* The override should only happen if the override version is not the
1252
** latest version of the wiki page. */
1253
if( !db_exists(
1254
"SELECT 1 FROM tag, blob, tagxref AS xA, tagxref AS xB "
1255
" WHERE tag.tagname GLOB 'wiki-%q*'"
1256
" AND blob.uuid GLOB '%q'"
1257
" AND xA.tagid=tag.tagid AND xA.rid=blob.rid"
1258
" AND xB.tagid=tag.tagid AND xB.mtime>xA.mtime",
1259
zTarget, wikiOverrideHash
1260
) ){
1261
return 0;
1262
}
1263
return wikiOverrideHash;
1264
}
1265
1266
/*
1267
** Resolve a hyperlink. The zTarget argument is the content of the [...]
1268
** in the wiki. Append to the output string whatever text is appropriate
1269
** for opening the hyperlink. Write into zClose[0...nClose-1] text that will
1270
** close the markup.
1271
**
1272
** If this routine determines that no hyperlink should be generated, then
1273
** set zClose[0] to 0.
1274
**
1275
** Actually, this routine might or might not append the hyperlink, depending
1276
** on current rendering rules: specifically does the current user have
1277
** "History" permission.
1278
**
1279
** [http://fossil-scm.org/]
1280
** [https://fossil-scm.org/]
1281
** [ftp://fossil-scm.org/]
1282
** [mailto:[email protected]]
1283
**
1284
** [/path] -> Refers to the root of the Fossil hierarchy, not
1285
** the root of the URI domain
1286
**
1287
** [./relpath]
1288
** [../relpath]
1289
**
1290
** [#fragment]
1291
**
1292
** [0123456789abcdef]
1293
**
1294
** [WikiPageName]
1295
** [wiki:WikiPageName]
1296
**
1297
** [2010-02-27 07:13]
1298
**
1299
** [InterMap:Link] -> Interwiki link
1300
**
1301
** The return value is a mask of RENDER_* values indicating what happened.
1302
** Probably the return value is 0 on success and RENDER_BADTARGET or
1303
** RENDER_BADLINK if there are problems.
1304
*/
1305
int wiki_resolve_hyperlink(
1306
Blob *pOut, /* Write the HTML output here */
1307
int mFlags, /* Rendering option flags */
1308
const char *zTarget, /* Hyperlink target; text within [...] */
1309
char *zClose, /* Write hyperlink closing text here */
1310
int nClose, /* Bytes available in zClose[] */
1311
const char *zOrig, /* Complete document text */
1312
const char *zTitle /* Title of the link */
1313
){
1314
const char *zTerm = "</a>";
1315
const char *z;
1316
char *zExtra = 0;
1317
const char *zExtraNS = 0;
1318
char *zRemote = 0;
1319
int rc = 0;
1320
1321
if( zTitle ){
1322
zExtra = mprintf(" title='%h'", zTitle);
1323
zExtraNS = zExtra+1;
1324
}else if( mFlags & WIKI_TARGET_BLANK ){
1325
zExtra = mprintf(" target='_blank'");
1326
zExtraNS = zExtra+1;
1327
}
1328
assert( nClose>=20 );
1329
if( strncmp(zTarget, "http:", 5)==0
1330
|| strncmp(zTarget, "https:", 6)==0
1331
|| strncmp(zTarget, "ftp:", 4)==0
1332
|| strncmp(zTarget, "mailto:", 7)==0
1333
){
1334
blob_appendf(pOut, "<a href=\"%s\"%s>", zTarget, zExtra);
1335
}else if( zTarget[0]=='/' ){
1336
blob_appendf(pOut, "<a href=\"%R%h\"%s>", zTarget, zExtra);
1337
}else if( zTarget[0]=='.'
1338
&& (zTarget[1]=='/' || (zTarget[1]=='.' && zTarget[2]=='/'))
1339
&& (mFlags & WIKI_LINKSONLY)==0 ){
1340
blob_appendf(pOut, "<a href=\"%h\"%s>", zTarget, zExtra);
1341
}else if( zTarget[0]=='#' ){
1342
blob_appendf(pOut, "<a href=\"%h\"%s>", zTarget, zExtra);
1343
}else if( is_valid_hname(zTarget) ){
1344
int isClosed = 0;
1345
const char *zLB = (mFlags & WIKI_NOBRACKET)==0 ? "[" : "";
1346
if( strlen(zTarget)<=HNAME_MAX && is_ticket(zTarget, &isClosed) ){
1347
/* Special display processing for tickets. Display the hyperlink
1348
** as crossed out if the ticket is closed.
1349
*/
1350
if( isClosed ){
1351
if( g.perm.Hyperlink ){
1352
blob_appendf(pOut,
1353
"%z<span class=\"wikiTagCancelled\">%s",
1354
xhref(zExtraNS,"%R/info/%s",zTarget), zLB
1355
);
1356
zTerm = "]</span></a>";
1357
}else{
1358
blob_appendf(pOut,"<span class=\"wikiTagCancelled\">%s", zLB);
1359
zTerm = "]</span>";
1360
}
1361
}else{
1362
if( g.perm.Hyperlink ){
1363
blob_appendf(pOut,"%z%s", xhref(zExtraNS,"%R/info/%s", zTarget),zLB);
1364
zTerm = "]</a>";
1365
}else{
1366
blob_appendf(pOut, "%s", zLB);
1367
zTerm = "]";
1368
}
1369
}
1370
}else if( !in_this_repo(zTarget) ){
1371
if( (mFlags & (WIKI_LINKSONLY|WIKI_NOBADLINKS))!=0 ){
1372
zTerm = "";
1373
}else if( (mFlags & WIKI_MARK)!=0 ){
1374
blob_appendf(pOut, "<mark>%s", zLB);
1375
zTerm = "]</mark>";
1376
rc |= RENDER_MARK;
1377
}else{
1378
blob_appendf(pOut, "<span class=\"brokenlink\">%s", zLB);
1379
zTerm = "]</span>";
1380
}
1381
rc |= RENDER_BADTARGET;
1382
}else if( g.perm.Hyperlink || (mFlags & WIKI_ADMIN)!=0 ){
1383
blob_appendf(pOut, "%z%s",xhref(zExtraNS, "%R/info/%s", zTarget), zLB);
1384
zTerm = "]</a>";
1385
}else{
1386
zTerm = "";
1387
}
1388
if( zTerm[0]==']' && (mFlags & WIKI_NOBRACKET)!=0 ) zTerm++;
1389
}else if( (zRemote = interwiki_url(zTarget))!=0 ){
1390
blob_appendf(pOut, "<a href=\"%z\"%s>", zRemote, zExtra);
1391
zTerm = "</a>";
1392
}else if( (z = validWikiPageName(mFlags, zTarget))!=0 ){
1393
/* The link is to a valid wiki page name */
1394
const char *zOverride = wiki_is_overridden(zTarget);
1395
if( zOverride ){
1396
blob_appendf(pOut, "<a href=\"%R/info/%S\"%s>", zOverride, zExtra);
1397
}else{
1398
blob_appendf(pOut, "<a href=\"%R/wiki?name=%T\"%s>", z, zExtra);
1399
}
1400
}else if( strlen(zTarget)>=10 && fossil_isdigit(zTarget[0]) && zTarget[4]=='-'
1401
&& db_int(0, "SELECT datetime(%Q) NOT NULL", zTarget) ){
1402
/* Dates or date-and-times in ISO8601 resolve to a link to the
1403
** timeline for that date */
1404
blob_appendf(pOut, "<a href=\"%R/timeline?c=%T\"%s>", zTarget, zExtra);
1405
}else if( mFlags & WIKI_MARKDOWNLINKS ){
1406
/* If none of the above, and if rendering links for markdown, then
1407
** create a link to the literal text of the target */
1408
blob_appendf(pOut, "<a href=\"%h\"%s>", zTarget, zExtra);
1409
}else if( mFlags & WIKI_MARK ){
1410
blob_appendf(pOut, "<mark>[");
1411
zTerm = "]</mark>";
1412
rc |= RENDER_BADTARGET|RENDER_MARK;
1413
}else if( zOrig && zTarget>=&zOrig[2]
1414
&& zTarget[-1]=='[' && !fossil_isspace(zTarget[-2]) ){
1415
/* If the hyperlink markup is not preceded by whitespace, then it
1416
** is probably a C-language subscript or similar, not really a
1417
** hyperlink. Just ignore it. */
1418
zTerm = "";
1419
}else if( (mFlags & (WIKI_NOBADLINKS|WIKI_LINKSONLY))!=0 ){
1420
/* Also ignore the link if various flags are set */
1421
zTerm = "";
1422
rc |= RENDER_BADTARGET;
1423
}else{
1424
blob_appendf(pOut, "<span class=\"brokenlink\">[%h]", zTarget);
1425
zTerm = "</span>";
1426
rc |= RENDER_BADTARGET;
1427
}
1428
if( zExtra ) fossil_free(zExtra);
1429
assert( (int)strlen(zTerm)<nClose );
1430
sqlite3_snprintf(nClose, zClose, "%s", zTerm);
1431
return rc;
1432
}
1433
1434
/*
1435
** Check zTarget to see if it looks like a valid hyperlink target.
1436
** Return true if it does seem valid and false if not.
1437
*/
1438
int wiki_valid_link_target(char *zTarget){
1439
char zClose[30];
1440
Blob notUsed;
1441
blob_init(&notUsed, 0, 0);
1442
wiki_resolve_hyperlink(&notUsed, WIKI_NOBADLINKS|WIKI_ADMIN,
1443
zTarget, zClose, sizeof(zClose)-1, 0, 0);
1444
blob_reset(&notUsed);
1445
return zClose[0]!=0;
1446
}
1447
1448
/*
1449
** Check to see if the given parsed markup is the correct
1450
** </verbatim> tag.
1451
*/
1452
static int endVerbatim(Renderer *p, ParsedMarkup *pMarkup){
1453
char *z;
1454
assert( p->inVerbatim );
1455
if( pMarkup->iCode!=MARKUP_VERBATIM ) return 0;
1456
if( !pMarkup->endTag ) return 0;
1457
if( p->zVerbatimId==0 ) return 1;
1458
if( pMarkup->nAttr!=1 ) return 0;
1459
z = pMarkup->aAttr[0].zValue;
1460
return fossil_strcmp(z, p->zVerbatimId)==0;
1461
}
1462
1463
/*
1464
** z[] points to the text that immediately follows markup of the form:
1465
**
1466
** <verbatim type='pikchr ...'>
1467
**
1468
** zClass is the argument to "type". This routine will process the
1469
** Pikchr text through the next matching </verbatim> (or until end-of-file)
1470
** and append the resulting SVG output onto p. It then returns the
1471
** number of bytes of text processed, including the closing </verbatim>.
1472
*/
1473
static int wiki_process_pikchr(Renderer *p, char *z, const char *zClass){
1474
ParsedMarkup m; /* Parsed closing tag */
1475
int i = 0; /* For looping over z[] in search of </verbatim> */
1476
int iRet = 0; /* Value to return */
1477
int atEnd = 0; /* True if se have found the </verbatim> */
1478
int nMarkup = 0; /* Length of a markup we are checking */
1479
1480
/* Search for the closing </verbatim> tag */
1481
while( z[i]!=0 ){
1482
char *zEnd = strchr(z+i, '<');
1483
if( zEnd==0 ){
1484
i += (int)strlen(z+i);
1485
iRet = i;
1486
break;
1487
}
1488
nMarkup = html_tag_length(zEnd);
1489
if( nMarkup<11 || fossil_strnicmp(zEnd, "</verbatim", 10)!=0 ){
1490
i = (int)(zEnd - z) + 1;
1491
continue;
1492
}
1493
(void)parseMarkup(&m, z+i);
1494
atEnd = endVerbatim(p, &m);
1495
unparseMarkup(&m);
1496
if( atEnd ){
1497
iRet = i + nMarkup;
1498
break;
1499
}
1500
i++;
1501
}
1502
1503
/* The Pikchr source text should be i character in length and iRet is
1504
** i plus the number of bytes in the </verbatim>. Generate the reply.
1505
*/
1506
assert( strncmp(zClass,"pikchr",6)==0 );
1507
zClass += 6;
1508
while( fossil_isspace(zClass[0]) ) zClass++;
1509
blob_append(p->pOut, "<p>", 3);
1510
pikchr_to_html(p->pOut, z, i, zClass, (int)strlen(zClass));
1511
blob_append(p->pOut, "</p>\n", 5);
1512
return iRet;
1513
}
1514
1515
/*
1516
** Return the MUTYPE for the top of the stack.
1517
*/
1518
static int stackTopType(Renderer *p){
1519
if( p->nStack<=0 ) return 0;
1520
return aMarkup[p->aStack[p->nStack-1].iCode].iType;
1521
}
1522
1523
/*
1524
** Convert the wiki in z[] into html in the renderer p. The
1525
** renderer has already been initialized.
1526
**
1527
** This routine will probably modify the content of z[].
1528
*/
1529
static void wiki_render(Renderer *p, char *z){
1530
int tokenType;
1531
ParsedMarkup markup;
1532
int n;
1533
int wikiHtmlOnly = (p->state & (WIKI_HTMLONLY | WIKI_LINKSONLY))!=0;
1534
int linksOnly = (p->state & WIKI_LINKSONLY)!=0;
1535
char *zOrig = z;
1536
1537
/* Make sure the attribute constants and names still align
1538
** following changes in the attribute list. */
1539
assert( fossil_strcmp(aAttribute[ATTR_WIDTH].zName, "width")==0 );
1540
1541
while( z[0] ){
1542
if( wikiHtmlOnly ){
1543
n = nextRawToken(z, p, &tokenType);
1544
}else{
1545
n = nextWikiToken(z, p, &tokenType);
1546
}
1547
p->state &= ~(AT_NEWLINE|AT_PARAGRAPH);
1548
switch( tokenType ){
1549
case TOKEN_PARAGRAPH: {
1550
if( p->wikiList ){
1551
popStackToTag(p, p->wikiList);
1552
p->wikiList = 0;
1553
}
1554
endAutoParagraph(p);
1555
blob_append_string(p->pOut, "\n\n");
1556
p->wantAutoParagraph = 1;
1557
p->state |= AT_PARAGRAPH|AT_NEWLINE;
1558
break;
1559
}
1560
case TOKEN_NEWLINE: {
1561
if( p->renderFlags & WIKI_NEWLINE ){
1562
blob_append_string(p->pOut, "<br>\n");
1563
}else{
1564
blob_append_string(p->pOut, "\n");
1565
}
1566
p->state |= AT_NEWLINE;
1567
break;
1568
}
1569
case TOKEN_BUL_LI: {
1570
p->mRender |= RENDER_BLOCK;
1571
if( p->wikiList!=MARKUP_UL ){
1572
if( p->wikiList ){
1573
popStackToTag(p, p->wikiList);
1574
}
1575
endAutoParagraph(p);
1576
pushStack(p, MARKUP_UL);
1577
blob_append_string(p->pOut, "<ul>");
1578
p->wikiList = MARKUP_UL;
1579
}
1580
popStackToTag(p, MARKUP_LI);
1581
startAutoParagraph(p);
1582
pushStack(p, MARKUP_LI);
1583
blob_append_string(p->pOut, "<li>");
1584
break;
1585
}
1586
case TOKEN_NUM_LI: {
1587
p->mRender |= RENDER_BLOCK;
1588
if( p->wikiList!=MARKUP_OL ){
1589
if( p->wikiList ){
1590
popStackToTag(p, p->wikiList);
1591
}
1592
endAutoParagraph(p);
1593
pushStack(p, MARKUP_OL);
1594
blob_append_string(p->pOut, "<ol>");
1595
p->wikiList = MARKUP_OL;
1596
}
1597
popStackToTag(p, MARKUP_LI);
1598
startAutoParagraph(p);
1599
pushStack(p, MARKUP_LI);
1600
blob_append_string(p->pOut, "<li>");
1601
break;
1602
}
1603
case TOKEN_ENUM: {
1604
p->mRender |= RENDER_BLOCK;
1605
if( p->wikiList!=MARKUP_OL ){
1606
if( p->wikiList ){
1607
popStackToTag(p, p->wikiList);
1608
}
1609
endAutoParagraph(p);
1610
pushStack(p, MARKUP_OL);
1611
blob_append_string(p->pOut, "<ol>");
1612
p->wikiList = MARKUP_OL;
1613
}
1614
popStackToTag(p, MARKUP_LI);
1615
startAutoParagraph(p);
1616
pushStack(p, MARKUP_LI);
1617
blob_appendf(p->pOut, "<li value=\"%d\">", atoi(z));
1618
break;
1619
}
1620
case TOKEN_INDENT: {
1621
p->mRender |= RENDER_BLOCK;
1622
assert( p->wikiList==0 );
1623
pushStack(p, MARKUP_BLOCKQUOTE);
1624
blob_append_string(p->pOut, "<blockquote>");
1625
p->wantAutoParagraph = 0;
1626
p->wikiList = MARKUP_BLOCKQUOTE;
1627
break;
1628
}
1629
case TOKEN_CHARACTER: {
1630
startAutoParagraph(p);
1631
if( p->state & WIKI_MARK ){
1632
blob_append_string(p->pOut, "<mark>");
1633
p->mRender |= RENDER_MARK;
1634
}
1635
if( z[0]=='<' ){
1636
p->mRender |= RENDER_BADTAG;
1637
blob_append_string(p->pOut, "&lt;");
1638
}else if( z[0]=='&' ){
1639
p->mRender |= RENDER_BADENTITY;
1640
blob_append_string(p->pOut, "&amp;");
1641
}
1642
if( p->state & WIKI_MARK ){
1643
if( fossil_isalnum(z[1]) || (z[1]=='/' && fossil_isalnum(z[2])) ){
1644
int kk;
1645
for(kk=2; fossil_isalnum(z[kk]); kk++){}
1646
blob_append(p->pOut, &z[1], kk-1);
1647
n = kk;
1648
}
1649
blob_append_string(p->pOut, "</mark>");
1650
}
1651
break;
1652
}
1653
case TOKEN_LINK: {
1654
char *zTarget;
1655
char *zDisplay = 0;
1656
int i, j;
1657
int savedState;
1658
char zClose[20];
1659
char cS1 = 0;
1660
int iS1 = 0;
1661
1662
startAutoParagraph(p);
1663
p->mRender |= RENDER_LINK;
1664
zTarget = &z[1];
1665
for(i=1; z[i] && z[i]!=']'; i++){
1666
if( z[i]=='|' && zDisplay==0 ){
1667
zDisplay = &z[i+1];
1668
for(j=i; j>0 && fossil_isspace(z[j-1]); j--){}
1669
iS1 = j;
1670
cS1 = z[j];
1671
z[j] = 0;
1672
}
1673
}
1674
z[i] = 0;
1675
if( zDisplay==0 ){
1676
zDisplay = zTarget + interwiki_removable_prefix(zTarget);
1677
}else{
1678
while( fossil_isspace(*zDisplay) ) zDisplay++;
1679
}
1680
p->mRender |= wiki_resolve_hyperlink(p->pOut, p->state,
1681
zTarget, zClose, sizeof(zClose), zOrig, 0);
1682
if( linksOnly || zClose[0]==0 || p->inVerbatim ){
1683
if( cS1 ) z[iS1] = cS1;
1684
if( zClose[0]!=']' ){
1685
blob_appendf(p->pOut, "[%h]%s", zTarget, zClose);
1686
}else{
1687
blob_appendf(p->pOut, "%h%s", zTarget, zClose);
1688
}
1689
}else{
1690
savedState = p->state;
1691
p->state &= ~ALLOW_WIKI;
1692
p->state |= FONT_MARKUP_ONLY;
1693
wiki_render(p, zDisplay);
1694
p->state = savedState;
1695
blob_append(p->pOut, zClose, -1);
1696
}
1697
break;
1698
}
1699
case TOKEN_TEXT: {
1700
int i;
1701
for(i=0; i<n && fossil_isspace(z[i]); i++){}
1702
if( i<n ) startAutoParagraph(p);
1703
blob_append(p->pOut, z, n);
1704
break;
1705
}
1706
case TOKEN_RAW: {
1707
if( linksOnly ){
1708
htmlize_to_blob(p->pOut, z, n);
1709
}else{
1710
blob_append(p->pOut, z, n);
1711
}
1712
break;
1713
}
1714
case TOKEN_MARKUP: {
1715
const char *zId;
1716
int iDiv;
1717
int mAttr = parseMarkup(&markup, z);
1718
1719
/* Convert <title> to <h1 align='center'> */
1720
if( markup.iCode==MARKUP_TITLE && !p->inVerbatim ){
1721
markup.iCode = MARKUP_H1;
1722
markup.nAttr = 1;
1723
markup.aAttr[0].iACode = AMSK_ALIGN;
1724
markup.aAttr[0].zValue = "center";
1725
markup.aAttr[0].cTerm = 0;
1726
}
1727
1728
/* Markup of the form </div id=ID> where there is a matching
1729
** ID somewhere on the stack. Exit any contained verbatim.
1730
** Pop the stack up to the matching <div>. Discard the </div>
1731
*/
1732
if( markup.iCode==MARKUP_DIV && markup.endTag &&
1733
(zId = markupId(&markup))!=0 &&
1734
(iDiv = findTagWithId(p, MARKUP_DIV, zId))>=0
1735
){
1736
if( p->inVerbatim ){
1737
p->inVerbatim = 0;
1738
p->state = p->preVerbState;
1739
blob_append_string(p->pOut, "</pre>");
1740
}
1741
while( p->nStack>iDiv+1 ) popStack(p);
1742
if( p->aStack[iDiv].allowWiki ){
1743
p->state |= ALLOW_WIKI;
1744
}else{
1745
p->state &= ~ALLOW_WIKI;
1746
}
1747
assert( p->nStack==iDiv+1 );
1748
p->nStack--;
1749
}else
1750
1751
/* If within <verbatim id=ID> ignore everything other than
1752
** </verbatim id=ID> and the </dev id=ID2> above.
1753
*/
1754
if( p->inVerbatim ){
1755
if( endVerbatim(p, &markup) ){
1756
p->inVerbatim = 0;
1757
p->state = p->preVerbState;
1758
blob_append_string(p->pOut, "</pre>");
1759
}else{
1760
unparseMarkup(&markup);
1761
blob_append_string(p->pOut, "&lt;");
1762
n = 1;
1763
}
1764
}else
1765
1766
/* Render invalid markup literally. The markup appears in the
1767
** final output as plain text.
1768
*/
1769
if( markup.iCode==MARKUP_INVALID ){
1770
p->mRender |= RENDER_BADTAG;
1771
unparseMarkup(&markup);
1772
startAutoParagraph(p);
1773
if( p->state & WIKI_MARK ){
1774
p->mRender |= RENDER_MARK;
1775
blob_append_string(p->pOut, "<mark>");
1776
htmlize_to_blob(p->pOut, z, n);
1777
blob_append_string(p->pOut, "</mark>");
1778
}else{
1779
blob_append_string(p->pOut, "&lt;");
1780
htmlize_to_blob(p->pOut, z+1, n-1);
1781
}
1782
}else
1783
1784
/* If the markup is not font-change markup ignore it if the
1785
** font-change-only flag is set.
1786
*/
1787
if( (markup.iType&MUTYPE_FONT)==0 && (p->state & FONT_MARKUP_ONLY)!=0 ){
1788
/* Do nothing */
1789
}else
1790
1791
if( markup.iCode==MARKUP_NOWIKI ){
1792
if( markup.endTag ){
1793
p->state |= ALLOW_WIKI;
1794
}else{
1795
p->state &= ~ALLOW_WIKI;
1796
}
1797
}else
1798
1799
/* Generate end-tags */
1800
if( markup.endTag ){
1801
popStackToTag(p, markup.iCode);
1802
}else
1803
1804
/* Push <div> markup onto the stack together with the id=ID attribute.
1805
*/
1806
if( markup.iCode==MARKUP_DIV && (mAttr & ATTR_ID)!=0 ){
1807
pushStackWithId(p, markup.iCode, markupId(&markup),
1808
(p->state & ALLOW_WIKI)!=0);
1809
}else
1810
1811
/* Enter <verbatim> processing. With verbatim enabled, all other
1812
** markup other than the corresponding end-tag with the same ID is
1813
** ignored.
1814
*/
1815
if( markup.iCode==MARKUP_VERBATIM ){
1816
int ii; /*, vAttrDidAppend=0;*/
1817
const char *zClass = 0;
1818
p->zVerbatimId = 0;
1819
p->inVerbatim = 1;
1820
p->preVerbState = p->state;
1821
p->state &= ~ALLOW_WIKI;
1822
for(ii=0; ii<markup.nAttr; ii++){
1823
if( markup.aAttr[ii].iACode == ATTR_ID ){
1824
p->zVerbatimId = markup.aAttr[ii].zValue;
1825
}else if( markup.aAttr[ii].iACode==ATTR_TYPE ){
1826
zClass = markup.aAttr[ii].zValue;
1827
}else if( markup.aAttr[ii].iACode==ATTR_LINKS
1828
&& !is_false(markup.aAttr[ii].zValue) ){
1829
p->state |= ALLOW_LINKS;
1830
}
1831
}
1832
endAutoParagraph(p);
1833
if( zClass==0 ){
1834
blob_append_string(p->pOut, "<pre class='verbatim'>");
1835
}else if( strncmp(zClass,"pikchr",6)==0 &&
1836
(fossil_isspace(zClass[6]) || zClass[6]==0) ){
1837
n += wiki_process_pikchr(p, z+n, zClass);
1838
p->inVerbatim = 0;
1839
p->state = p->preVerbState;
1840
}else{
1841
blob_appendf(p->pOut, "<pre name='code' class='%h'>",
1842
zClass);
1843
}
1844
p->wantAutoParagraph = 0;
1845
}else
1846
if( markup.iType==MUTYPE_LI ){
1847
if( backupToType(p, MUTYPE_LIST)==0 ){
1848
endAutoParagraph(p);
1849
pushStack(p, MARKUP_UL);
1850
blob_append_string(p->pOut, "<ul>");
1851
}
1852
pushStack(p, MARKUP_LI);
1853
renderMarkup(p->pOut, &markup);
1854
}else
1855
if( markup.iType==MUTYPE_TR ){
1856
if( backupToType(p, MUTYPE_TABLE) ){
1857
pushStack(p, MARKUP_TR);
1858
renderMarkup(p->pOut, &markup);
1859
}
1860
}else
1861
if( markup.iType==MUTYPE_TD ){
1862
if( backupToType(p, MUTYPE_TABLE|MUTYPE_TR) ){
1863
if( stackTopType(p)==MUTYPE_TABLE ){
1864
pushStack(p, MARKUP_TR);
1865
blob_append_string(p->pOut, "<tr>");
1866
}
1867
p->wantAutoParagraph = 0;
1868
pushStack(p, markup.iCode);
1869
renderMarkup(p->pOut, &markup);
1870
}
1871
}else
1872
if( markup.iType==MUTYPE_HYPERLINK ){
1873
if( !isButtonHyperlink(p, &markup, z, &n) ){
1874
popStackToTag(p, markup.iCode);
1875
startAutoParagraph(p);
1876
renderMarkup(p->pOut, &markup);
1877
pushStack(p, markup.iCode);
1878
}
1879
}else
1880
{
1881
if( markup.iType==MUTYPE_FONT ){
1882
startAutoParagraph(p);
1883
}else if( markup.iType==MUTYPE_BLOCK || markup.iType==MUTYPE_LIST ){
1884
p->mRender |= RENDER_BLOCKTAG;
1885
p->wantAutoParagraph = 0;
1886
}
1887
if( markup.iCode==MARKUP_HR
1888
|| markup.iCode==MARKUP_H1
1889
|| markup.iCode==MARKUP_H2
1890
|| markup.iCode==MARKUP_H3
1891
|| markup.iCode==MARKUP_H4
1892
|| markup.iCode==MARKUP_H5
1893
|| markup.iCode==MARKUP_P
1894
){
1895
endAutoParagraph(p);
1896
}
1897
if( (markup.iType & MUTYPE_STACK )!=0 ){
1898
pushStack(p, markup.iCode);
1899
}
1900
renderMarkup(p->pOut, &markup);
1901
}
1902
break;
1903
}
1904
}
1905
z += n;
1906
}
1907
}
1908
1909
/*
1910
** Transform the text in the pIn blob. Write the results
1911
** into the pOut blob. The pOut blob should already be
1912
** initialized. The output is merely appended to pOut.
1913
** If pOut is NULL, then the output is appended to the CGI
1914
** reply.
1915
**
1916
** Return a mask of RENDER_ flags indicating what happened.
1917
*/
1918
int wiki_convert(Blob *pIn, Blob *pOut, int flags){
1919
Renderer renderer;
1920
1921
memset(&renderer, 0, sizeof(renderer));
1922
renderer.renderFlags = flags;
1923
renderer.state = ALLOW_WIKI|AT_NEWLINE|AT_PARAGRAPH|flags;
1924
if( flags & WIKI_INLINE ){
1925
renderer.wantAutoParagraph = 0;
1926
}else{
1927
renderer.wantAutoParagraph = 1;
1928
}
1929
if( wikiUsesHtml() ){
1930
renderer.state |= WIKI_HTMLONLY;
1931
}
1932
if( pOut ){
1933
renderer.pOut = pOut;
1934
}else{
1935
renderer.pOut = cgi_output_blob();
1936
}
1937
1938
blob_to_utf8_no_bom(pIn, 0);
1939
wiki_render(&renderer, blob_str(pIn));
1940
endAutoParagraph(&renderer);
1941
while( renderer.nStack ){
1942
popStack(&renderer);
1943
}
1944
blob_append_char(renderer.pOut, '\n');
1945
free(renderer.aStack);
1946
return renderer.mRender;
1947
}
1948
1949
/*
1950
** COMMAND: test-wiki-render
1951
**
1952
** Usage: %fossil test-wiki-render FILE [OPTIONS]
1953
**
1954
** Translate the input FILE from Fossil-wiki into HTML and write
1955
** the resulting HTML on standard output.
1956
**
1957
** Options:
1958
** --buttons Set the WIKI_BUTTONS flag
1959
** --dark-pikchr Render pikchrs in dark mode
1960
** --flow Render as text using comment_format
1961
** --htmlonly Set the WIKI_HTMLONLY flag
1962
** --inline Set the WIKI_INLINE flag
1963
** --linksonly Set the WIKI_LINKSONLY flag
1964
** -m TEXT Use TEXT in place of the content of FILE
1965
** --mark Add <mark>...</mark> around problems
1966
** --nobadlinks Set the WIKI_NOBADLINKS flag
1967
** --text Run the output through html_to_plaintext()
1968
** --type Break down the return code from wiki_convert()
1969
*/
1970
void test_wiki_render(void){
1971
Blob in, out;
1972
int flags = 0;
1973
int bText;
1974
int bFlow = 0;
1975
int showType = 0;
1976
int mType;
1977
const char *zIn;
1978
if( find_option("buttons",0,0)!=0 ) flags |= WIKI_BUTTONS;
1979
if( find_option("htmlonly",0,0)!=0 ) flags |= WIKI_HTMLONLY;
1980
if( find_option("linksonly",0,0)!=0 ) flags |= WIKI_LINKSONLY;
1981
if( find_option("nobadlinks",0,0)!=0 ) flags |= WIKI_NOBADLINKS;
1982
if( find_option("inline",0,0)!=0 ) flags |= WIKI_INLINE;
1983
if( find_option("mark",0,0)!=0 ) flags |= WIKI_MARK;
1984
if( find_option("dark-pikchr",0,0)!=0 ){
1985
pikchr_to_html_add_flags( PIKCHR_PROCESS_DARK_MODE );
1986
}
1987
bText = find_option("text",0,0)!=0;
1988
bFlow = find_option("flow",0,0)!=0;
1989
showType = find_option("type",0,0)!=0;
1990
zIn = find_option("msg","m",1);
1991
db_find_and_open_repository(OPEN_OK_NOT_FOUND|OPEN_SUBSTITUTE,0);
1992
verify_all_options();
1993
if( (zIn==0 && g.argc!=3) || (zIn!=0 && g.argc!=2) ) usage("FILE");
1994
blob_zero(&out);
1995
if( zIn ){
1996
blob_init(&in, zIn, -1);
1997
}else{
1998
blob_read_from_file(&in, g.argv[2], ExtFILE);
1999
}
2000
mType = wiki_convert(&in, &out, flags);
2001
if( bText ){
2002
Blob txt;
2003
int htot = HTOT_TRIM;
2004
if( terminal_is_vt100() ) htot |= HTOT_VT100;
2005
if( bFlow ) htot |= HTOT_FLOW;
2006
blob_init(&txt, 0, 0);
2007
html_to_plaintext(blob_str(&out),&txt, htot);
2008
blob_reset(&out);
2009
out = txt;
2010
}
2011
if( bFlow ){
2012
fossil_print(" ");
2013
comment_print(blob_str(&out), 0, 3, terminal_get_width(80)-3,
2014
get_comment_format());
2015
}else{
2016
blob_write_to_file(&out, "-");
2017
}
2018
if( showType ){
2019
fossil_print("%.*c\nResult Codes:", terminal_get_width(80)-1, '*');
2020
if( mType & RENDER_LINK ) fossil_print(" LINK");
2021
if( mType & RENDER_ENTITY ) fossil_print(" ENTITY");
2022
if( mType & RENDER_TAG ) fossil_print(" TAG");
2023
if( mType & RENDER_BLOCKTAG ) fossil_print(" BLOCKTAG");
2024
if( mType & RENDER_BLOCK ) fossil_print(" BLOCK");
2025
if( mType & RENDER_MARK ) fossil_print(" MARK");
2026
if( mType & RENDER_BADLINK ) fossil_print(" BADLINK");
2027
if( mType & RENDER_BADTARGET ) fossil_print(" BADTARGET");
2028
if( mType & RENDER_BADTAG ) fossil_print(" BADTAG");
2029
if( mType & RENDER_BADENTITY ) fossil_print(" BADENTITY");
2030
if( mType & RENDER_BADHTML ) fossil_print(" BADHTML");
2031
if( mType & RENDER_ERROR ) fossil_print(" ERROR");
2032
fossil_print("\n");
2033
}
2034
}
2035
2036
/*
2037
** COMMAND: test-markdown-render
2038
**
2039
** Usage: %fossil test-markdown-render FILE ...
2040
**
2041
** Render markdown in FILE as HTML on stdout.
2042
** Options:
2043
**
2044
** --dark-pikchr Render pikchrs in dark mode
2045
** --lint-footnotes Print stats for footnotes-related issues
2046
** --safe Restrict the output to use only "safe" HTML
2047
** --text Run the output through html_to_plaintext().
2048
*/
2049
void test_markdown_render(void){
2050
Blob in, out;
2051
int i;
2052
int bSafe = 0, bFnLint = 0, bText = 0;
2053
db_find_and_open_repository(OPEN_OK_NOT_FOUND|OPEN_SUBSTITUTE,0);
2054
bSafe = find_option("safe",0,0)!=0;
2055
bFnLint = find_option("lint-footnotes",0,0)!=0;
2056
if( find_option("dark-pikchr",0,0)!=0 ){
2057
pikchr_to_html_add_flags( PIKCHR_PROCESS_DARK_MODE );
2058
}
2059
bText = find_option("text",0,0)!=0;
2060
verify_all_options();
2061
for(i=2; i<g.argc; i++){
2062
blob_zero(&out);
2063
blob_read_from_file(&in, g.argv[i], ExtFILE);
2064
if( g.argc>3 ){
2065
fossil_print("<!------ %h ------->\n", g.argv[i]);
2066
}
2067
markdown_to_html(&in, 0, &out);
2068
safe_html_context( bSafe ? DOCSRC_UNTRUSTED : DOCSRC_TRUSTED );
2069
safe_html(&out);
2070
if( bText ){
2071
Blob txt;
2072
blob_init(&txt, 0, 0);
2073
html_to_plaintext(blob_str(&out), &txt, HTOT_VT100);
2074
blob_reset(&out);
2075
out = txt;
2076
}
2077
blob_write_to_file(&out, "-");
2078
blob_reset(&in);
2079
blob_reset(&out);
2080
}
2081
if( bFnLint && (g.ftntsIssues[0] || g.ftntsIssues[1]
2082
|| g.ftntsIssues[2] || g.ftntsIssues[3] )){
2083
fossil_fatal("There were issues with footnotes:\n"
2084
" %8d misreference%s\n"
2085
" %8d unreferenced\n"
2086
" %8d split\n"
2087
" %8d overnested",
2088
g.ftntsIssues[0], g.ftntsIssues[0]==1?"":"s",
2089
g.ftntsIssues[1], g.ftntsIssues[2], g.ftntsIssues[3]);
2090
}
2091
}
2092
2093
/*
2094
** Search for a <title>...</title> at the beginning of a wiki page.
2095
** Return true (nonzero) if a title is found. Return zero if there is
2096
** not title.
2097
**
2098
** If a title is found, initialize the pTitle blob to be the content
2099
** of the title and initialize pTail to be the text that follows the
2100
** title.
2101
*/
2102
int wiki_find_title(Blob *pIn, Blob *pTitle, Blob *pTail){
2103
char *z;
2104
int i;
2105
int iStart;
2106
blob_to_utf8_no_bom(pIn, 0);
2107
z = blob_str(pIn);
2108
for(i=0; fossil_isspace(z[i]); i++){}
2109
if( z[i]!='<' ) return 0;
2110
i++;
2111
if( strncmp(&z[i],"title>", 6)!=0 ) return 0;
2112
for(iStart=i+6; fossil_isspace(z[iStart]); iStart++){}
2113
for(i=iStart; z[i] && (z[i]!='<' || strncmp(&z[i],"</title>",8)!=0); i++){}
2114
if( strncmp(&z[i],"</title>",8)!=0 ){
2115
blob_init(pTitle, 0, 0);
2116
blob_init(pTail, &z[iStart], -1);
2117
return 1;
2118
}
2119
if( i-iStart>0 ){
2120
blob_init(pTitle, &z[iStart], i-iStart);
2121
}else{
2122
blob_init(pTitle, 0, 0);
2123
}
2124
blob_init(pTail, &z[i+8], -1);
2125
return 1;
2126
}
2127
2128
/*
2129
** Parse text looking for wiki hyperlinks in one of the formats:
2130
**
2131
** [target]
2132
** [target|...]
2133
**
2134
** Where "target" can be either an artifact ID prefix or a wiki page
2135
** name. For each such hyperlink found, add an entry to the
2136
** backlink table.
2137
**
2138
** The return value is a mask of RENDER_ flags.
2139
*/
2140
int wiki_extract_links(
2141
char *z, /* The wiki text from which to extract links */
2142
Backlink *pBklnk, /* Backlink extraction context */
2143
int flags /* wiki parsing flags */
2144
){
2145
Renderer renderer;
2146
int tokenType;
2147
ParsedMarkup markup;
2148
int n;
2149
int wikiHtmlOnly = 0;
2150
2151
memset(&renderer, 0, sizeof(renderer));
2152
renderer.state = ALLOW_WIKI|AT_NEWLINE|AT_PARAGRAPH;
2153
if( wikiUsesHtml() ){
2154
renderer.state |= WIKI_HTMLONLY;
2155
wikiHtmlOnly = 1;
2156
}
2157
2158
while( z[0] ){
2159
if( wikiHtmlOnly ){
2160
n = nextRawToken(z, &renderer, &tokenType);
2161
}else{
2162
n = nextWikiToken(z, &renderer, &tokenType);
2163
}
2164
switch( tokenType ){
2165
case TOKEN_LINK: {
2166
char *zTarget;
2167
int i;
2168
2169
zTarget = &z[1];
2170
for(i=0; zTarget[i] && zTarget[i]!='|' && zTarget[i]!=']'; i++){}
2171
while(i>1 && zTarget[i-1]==' '){ i--; }
2172
backlink_create(pBklnk, zTarget, i);
2173
break;
2174
}
2175
case TOKEN_MARKUP: {
2176
const char *zId;
2177
int iDiv;
2178
parseMarkup(&markup, z);
2179
2180
/* Markup of the form </div id=ID> where there is a matching
2181
** ID somewhere on the stack. Exit the verbatim if were are in
2182
** it. Pop the stack up to the matching <div>. Discard the
2183
** </div>
2184
*/
2185
if( markup.iCode==MARKUP_DIV && markup.endTag &&
2186
(zId = markupId(&markup))!=0 &&
2187
(iDiv = findTagWithId(&renderer, MARKUP_DIV, zId))>=0
2188
){
2189
if( renderer.inVerbatim ){
2190
renderer.inVerbatim = 0;
2191
renderer.state = renderer.preVerbState;
2192
}
2193
while( renderer.nStack>iDiv+1 ) popStack(&renderer);
2194
if( renderer.aStack[iDiv].allowWiki ){
2195
renderer.state |= ALLOW_WIKI;
2196
}else{
2197
renderer.state &= ~ALLOW_WIKI;
2198
}
2199
renderer.nStack--;
2200
}else
2201
2202
/* If within <verbatim id=ID> ignore everything other than
2203
** </verbatim id=ID> and the </dev id=ID2> above.
2204
*/
2205
if( renderer.inVerbatim ){
2206
if( endVerbatim(&renderer, &markup) ){
2207
renderer.inVerbatim = 0;
2208
renderer.state = renderer.preVerbState;
2209
}else{
2210
n = 1;
2211
}
2212
}else
2213
2214
/* Render invalid markup literally. The markup appears in the
2215
** final output as plain text.
2216
*/
2217
if( markup.iCode==MARKUP_INVALID ){
2218
n = 1;
2219
}else
2220
2221
/* If the markup is not font-change markup ignore it if the
2222
** font-change-only flag is set.
2223
*/
2224
if( (markup.iType&MUTYPE_FONT)==0 &&
2225
(renderer.state & FONT_MARKUP_ONLY)!=0 ){
2226
/* Do nothing */
2227
}else
2228
2229
if( markup.iCode==MARKUP_NOWIKI ){
2230
if( markup.endTag ){
2231
renderer.state |= ALLOW_WIKI;
2232
}else{
2233
renderer.state &= ~ALLOW_WIKI;
2234
}
2235
}else
2236
2237
/* Generate end-tags */
2238
if( markup.endTag ){
2239
popStackToTag(&renderer, markup.iCode);
2240
}else
2241
2242
/* Push <div> markup onto the stack together with the id=ID attribute.
2243
*/
2244
if( markup.iCode==MARKUP_DIV ){
2245
pushStackWithId(&renderer, markup.iCode, markupId(&markup),
2246
(renderer.state & ALLOW_WIKI)!=0);
2247
}else
2248
2249
/* Enter <verbatim> processing. With verbatim enabled, all other
2250
** markup other than the corresponding end-tag with the same ID is
2251
** ignored.
2252
*/
2253
if( markup.iCode==MARKUP_VERBATIM ){
2254
int vAttrIdx;
2255
renderer.zVerbatimId = 0;
2256
renderer.inVerbatim = 1;
2257
renderer.preVerbState = renderer.state;
2258
renderer.state &= ~ALLOW_WIKI;
2259
for (vAttrIdx = 0; vAttrIdx < markup.nAttr; vAttrIdx++){
2260
if( markup.aAttr[vAttrIdx].iACode == ATTR_ID ){
2261
renderer.zVerbatimId = markup.aAttr[0].zValue;
2262
}
2263
}
2264
renderer.wantAutoParagraph = 0;
2265
}
2266
2267
/* Restore the input text to its original configuration
2268
*/
2269
unparseMarkup(&markup);
2270
break;
2271
}
2272
default: {
2273
break;
2274
}
2275
}
2276
z += n;
2277
}
2278
free(renderer.aStack);
2279
return renderer.mRender;
2280
}
2281
2282
/*
2283
** Return the length, in bytes, of the HTML token that z is pointing to.
2284
*/
2285
int html_token_length(const char *z){
2286
int n;
2287
char c;
2288
if( (c=z[0])=='<' ){
2289
n = html_tag_length(z);
2290
if( n<=0 ) n = 1;
2291
}else if( fossil_isspace(c) ){
2292
for(n=1; z[n] && fossil_isspace(z[n]); n++){}
2293
}else if( c=='&' ){
2294
n = z[1]=='#' ? 2 : 1;
2295
while( fossil_isalnum(z[n]) ) n++;
2296
if( z[n]==';' ) n++;
2297
}else{
2298
n = 1;
2299
for(n=1; 1; n++){
2300
if( (c = z[n]) > '<' ) continue;
2301
if( c=='<' || c=='&' || fossil_isspace(c) || c==0 ) break;
2302
}
2303
}
2304
return n;
2305
}
2306
2307
/*
2308
** z points to someplace in the middle of HTML markup. Return the length
2309
** of the subtoken that starts on z.
2310
*/
2311
int html_subtoken_length(const char *z){
2312
int n;
2313
char c;
2314
c = z[0];
2315
if( fossil_isspace(c) ){
2316
for(n=1; z[n] && fossil_isspace(z[n]); n++){}
2317
return n;
2318
}
2319
if( c=='"' || c=='\'' ){
2320
for(n=1; z[n] && z[n]!=c && z[n]!='>'; n++){}
2321
if( z[n]==c ) n++;
2322
return n;
2323
}
2324
if( c=='>' ){
2325
return 0;
2326
}
2327
if( c=='=' ){
2328
return 1;
2329
}
2330
if( fossil_isalnum(c) || c=='/' ){
2331
for(n=1; (c=z[n])!=0 && (fossil_isalnum(c) || c=='-' || c=='_'); n++){}
2332
return n;
2333
}
2334
return 1;
2335
}
2336
2337
/*
2338
** z points to an HTML markup token: <TAG ATTR=VALUE ...>
2339
** This routine looks for the VALUE associated with zAttr and returns
2340
** a pointer to the start of that value and sets *pLen to be the length
2341
** in bytes for the value. Or it returns NULL if no such attr exists.
2342
*/
2343
const char *html_attribute(const char *zMarkup, const char *zAttr, int *pLen){
2344
int i = 1;
2345
int n;
2346
int nAttr;
2347
int iMatchCnt = 0;
2348
assert( zMarkup[0]=='<' );
2349
assert( zMarkup[1]!=0 );
2350
n = html_subtoken_length(zMarkup+i);
2351
if( n==0 ) return 0;
2352
i += n;
2353
nAttr = (int)strlen(zAttr);
2354
while( 1 ){
2355
const char *zStart = zMarkup+i;
2356
n = html_subtoken_length(zStart);
2357
if( n==0 ) break;
2358
i += n;
2359
if( fossil_isspace(zStart[0]) ) continue;
2360
if( n==nAttr && fossil_strnicmp(zAttr,zStart,nAttr)==0 ){
2361
iMatchCnt = 1;
2362
}else if( n==1 && zStart[0]=='=' && iMatchCnt==1 ){
2363
iMatchCnt = 2;
2364
}else if( iMatchCnt==2 ){
2365
if( (zStart[0]=='"' || zStart[0]=='\'') && zStart[n-1]==zStart[0] ){
2366
zStart++;
2367
n -= 2;
2368
}
2369
*pLen = n;
2370
return zStart;
2371
}else{
2372
iMatchCnt = 0;
2373
}
2374
}
2375
return 0;
2376
}
2377
2378
/*
2379
** COMMAND: test-html-tokenize
2380
**
2381
** Tokenize an HTML file. Return the offset and length and text of
2382
** each token - one token per line. Omit white-space tokens.
2383
*/
2384
void test_html_tokenize(void){
2385
Blob in;
2386
char *z;
2387
int i;
2388
int iOfst, n;
2389
2390
for(i=2; i<g.argc; i++){
2391
blob_read_from_file(&in, g.argv[i], ExtFILE);
2392
z = blob_str(&in);
2393
for(iOfst=0; z[iOfst]; iOfst+=n){
2394
n = html_token_length(z+iOfst);
2395
if( fossil_isspace(z[iOfst]) ) continue;
2396
fossil_print("%d %d %.*s\n", iOfst, n, n, z+iOfst);
2397
if( z[iOfst]=='<' && n>1 ){
2398
int j,k;
2399
for(j=iOfst+1; (k = html_subtoken_length(z+j))>0; j+=k){
2400
if( fossil_isspace(z[j]) || z[j]=='=' ) continue;
2401
fossil_print("# %d %d %.*s\n", j, k, k, z+j);
2402
}
2403
}
2404
}
2405
blob_reset(&in);
2406
}
2407
}
2408
2409
/*
2410
** Attempt to reformat messy HTML to be easily readable by humans.
2411
**
2412
** * Try to keep lines less than 80 characters in length
2413
** * Collapse white space into a single space
2414
** * Put a blank line before:
2415
** <blockquote><center><code><hN><p><pre><table>
2416
** * Put a newline after <br> and <hr>
2417
** * Start each of the following elements on a new line:
2418
** <address><cite><dd><div><dl><dt><li><ol><samp>
2419
** <tbody><td><tfoot><th><thead><tr><ul>
2420
**
2421
** Except, do not do any reformatting inside of <pre>...</pre>
2422
*/
2423
void htmlTidy(const char *zIn, Blob *pOut){
2424
int n;
2425
int nPre = 0;
2426
int iCur = 0;
2427
int wantSpace = 0;
2428
int omitSpace = 1;
2429
while( zIn[0] ){
2430
n = html_token_length(zIn);
2431
if( zIn[0]=='<' && n>1 ){
2432
int i, j;
2433
int isCloseTag;
2434
int eTag;
2435
int eType;
2436
char zTag[32];
2437
isCloseTag = zIn[1]=='/';
2438
for(i=0, j=1+isCloseTag; i<30 && fossil_isalnum(zIn[j]); i++, j++){
2439
zTag[i] = fossil_tolower(zIn[j]);
2440
}
2441
zTag[i] = 0;
2442
eTag = findTag(zTag);
2443
eType = aMarkup[eTag].iType;
2444
if( eTag==MARKUP_PRE ){
2445
if( isCloseTag ){
2446
nPre--;
2447
blob_append(pOut, zIn, n);
2448
zIn += n;
2449
if( nPre==0 ){ blob_append_char(pOut, '\n'); iCur = 0; }
2450
continue;
2451
}else{
2452
if( iCur && nPre==0 ){ blob_append_char(pOut, '\n'); iCur = 0; }
2453
nPre++;
2454
}
2455
}else if( eType & (MUTYPE_BLOCK|MUTYPE_TABLE) ){
2456
if( !isCloseTag && nPre==0 && blob_size(pOut)>0 ){
2457
blob_append(pOut, "\n\n", 1 + (iCur>0));
2458
iCur = 0;
2459
}
2460
wantSpace = 0;
2461
omitSpace = 1;
2462
}else if( (eType & (MUTYPE_LIST|MUTYPE_LI|MUTYPE_TR|MUTYPE_TD))!=0
2463
|| eTag==MARKUP_HR
2464
){
2465
if( nPre==0 && (!isCloseTag || (eType&MUTYPE_LIST)!=0) && iCur>0 ){
2466
blob_append_char(pOut, '\n');
2467
iCur = 0;
2468
}
2469
wantSpace = 0;
2470
omitSpace = 1;
2471
}
2472
if( wantSpace && nPre==0 ){
2473
if( iCur+n+1>=80 ){
2474
blob_append_char(pOut, '\n');
2475
iCur = 0;
2476
}else{
2477
blob_append_char(pOut, ' ');
2478
iCur++;
2479
}
2480
}
2481
blob_append(pOut, zIn, n);
2482
iCur += n;
2483
wantSpace = 0;
2484
if( eTag==MARKUP_BR || eTag==MARKUP_HR ){
2485
blob_append_char(pOut, '\n');
2486
iCur = 0;
2487
}
2488
}else if( fossil_isspace(zIn[0]) ){
2489
if( nPre ){
2490
blob_append(pOut, zIn, n);
2491
}else{
2492
wantSpace = !omitSpace;
2493
}
2494
}else{
2495
if( wantSpace && nPre==0 ){
2496
if( iCur+n+1>=80 ){
2497
blob_append_char(pOut, '\n');
2498
iCur = 0;
2499
}else{
2500
blob_append_char(pOut, ' ');
2501
iCur++;
2502
}
2503
}
2504
blob_append(pOut, zIn, n);
2505
iCur += n;
2506
wantSpace = omitSpace = 0;
2507
}
2508
zIn += n;
2509
}
2510
if( iCur ) blob_append_char(pOut, '\n');
2511
}
2512
2513
/*
2514
** COMMAND: test-html-tidy
2515
**
2516
** Run the htmlTidy() routine on the content of all files named on
2517
** the command-line and write the results to standard output.
2518
*/
2519
void test_html_tidy(void){
2520
Blob in, out;
2521
int i;
2522
2523
for(i=2; i<g.argc; i++){
2524
blob_read_from_file(&in, g.argv[i], ExtFILE);
2525
blob_zero(&out);
2526
htmlTidy(blob_str(&in), &out);
2527
blob_reset(&in);
2528
fossil_puts(blob_buffer(&out), 0, blob_size(&out));
2529
blob_reset(&out);
2530
}
2531
}
2532
2533
#if INTERFACE
2534
/*
2535
** Allowed flag options for html_to_plaintext().
2536
*/
2537
#define HTOT_VT100 0x01 /* <mark> becomes ^[[91rk> becomes ^[[<g.cliColor>m */
2538
#define HTOT_FLOW 0x02 /* Collapse internal whitespace to a single space */
2539
#define HTOT_TRIM 0x04 /* Trim off leading and trailing whitespace */
2540
2541
#endif /* INTERFACE */
2542
2543
/*
2544
** Add <MARK> or </MARK> to the output, or similar VT-100 escape
2545
** codes.
2546
*/
2547
static void addMark(Blob *pOut, ipBlob, zHtml+j, i-jnt isClose){
2548
const char *az[4"\033[91m", "\033[0m" };
2549
int i = 0;
2550
if( isClose ) i++;
2551
if( mFlags & HTOT_VT100 ) i += 2;
2552
blob_append(pOut, az[i], -1);
2553
}
2554
2555
/*
2556
** Remove all HTML markup from the input text. The output written into
2557
** pOut is pure text.
2558
**
2559
** Put the title on the first line, if there is any <title> markup.
2560
** If there is no <title>, then create a blank first line.
2561
*/
2562
void html_to_plaintext(const char *zIn, Blob *pOut, int mFlags){
2563
int n;
2564
int i, j;
2565
int bFlow = 0; /* Transform internal WS into a single space */
2566
int prevWS = 1; /* Previous output was whitespace or start of msg */
2567
int nMark = 0; /* True if inside of <mark>..</mark> */
2568
2569
for(i=0; fossil_isspace(zIn[i]); i++){}
2570
if( i>0 && (mFlags & HTOT_TRIM)==0 ){
2571
blob_append(pOut, zIn, i);
2572
}
2573
zIn += i;
2574
if( mFlags & HTOT_FLOW ) bFlow = 1;
2575
while( zIn[0] ){
2576
n = html_token_length(zIn);
2577
if( zIn[0]=='<' && n>1 ){
2578
int isCloseTag;
2579
int eTag;
2580
int eType;
2581
char zTag[32];
2582
prevWS = 0;
2583
isCloseTag = zIn[1]=='/';
2584
for(i=0, j=1+isCloseTag; i<30 && fossil_isalnum(zIn[j]); i++, j++){
2585
zTag[i] = fossil_tolower(zIn[j]);
2586
}
2587
zTag[i] = 0;
2588
eTag = findTag(zTag);
2589
eType = aMarkup[eTag].iType;
2590
if( eTag==MARKUP_INVALID && fossil_strnicmp(zIn,"<style",6)==0 ){
2591
zIn += n;
2592
while( zIn[0] ){
2593
n = html_token_length(zIn);
2594
if( fossil_strnicmp(zIn, "</style",7)==0 ) break;
2595
zIn += n;
2596
}
2597
if( zIn[0]=='<' ) zIn += n;
2598
continue;
2599
}
2600
if( eTag==MARKUP_INVALID && strcmp(zTag,"mark")==0 ){
2601
if( isCloseTag && nMark ){
2602
addMark(pOut, mFlags, 1);
2603
nMark = 0;
2604
}else if( !isCloseTag && !nMark ){
2605
addMark(pOut, mFlags, 0);
2606
nMark = 1;
2607
}
2608
zIn += n;
2609
continue;
2610
}
2611
if( eTag==MARKUP_TITLE ){
2612
if( isCloseTag && (mFlags & HTOT_FLOW)==0 ){
2613
bFlow = 0;
2614
}else{
2615
bFlow = 1;
2616
}
2617
}
2618
if( !isCloseTag && (eType & (MUTYPE_BLOCK|MUTYPE_TABLE))!=0 ){
2619
blob_append_char(pOut, '\n');
2620
}
2621
}else if( fossil_isspace(zIn[0]) ){
2622
if( bFlow==0 ){
2623
if( zIn[n]==0 && (mFlags & HTOT_TRIM) ) break;
2624
blob_append(pOut, zIn, n);
2625
}else if( !prevWS ){
2626
prevWS = 1;
2627
blob_append_char(pOut, ' ');
2628
zIn += n;
2629
n = 0;
2630
}
2631
}else if( zIn[0]=='&' ){
2632
u32 c = '?';
2633
prevWS = 0;
2634
if( zIn[1]=='#' ){
2635
c = atoi(&zIn[2]);
2636
if( c==0 ) c = '?';
2637
}else{
2638
static const struct { int n; u32 c; char *z; } aEntity[] = {
2639
{ 5, '&', "&amp;" },
2640
{ 4, '<', "&lt;" },
2641
{ 4, '>', "&gt;" },
2642
{ 6, ' ', "&nbsp;" },
2643
{ 6, '"', "&quot;" },
2644
};
2645
int jj;
2646
for(jj=0; jj<count(aEntity); jj++){
2647
if( aEntity[jj].n==n && strncmp(aEntity[jj].z,zIn,n)==0 ){
2648
c = aEntity[jj].c;
2649
break;
2650
}
2651
}
2652
}
2653
if( c<0x00080 ){
2654
blob_append_char(pOut, c & 0xff);
2655
}else if( c<0x00800 ){
2656
blob_append_char(pOut, 0xc0 + (u8)((c>>6)&0x1f));
2657
blob_append_char(pOut, 0x80 + (u8)(c&0x3f));
2658
}else if( c<0x10000 ){
2659
blob_append_char(pOut, 0xe0 + (u8)((c>>12)&0x0f));
2660
blob_append_char(pOut, 0x80 + (u8)((c>>6)&0x3f));
2661
blob_append_char(pOut, 0x80 + (u8)(c&0x3f));
2662
}else{
2663
blob_append_char(pOut, 0xf0 + (u8)((c>>18)&0x07));
2664
blob_append_char(pOut, 0x80 + (u8)((c>>12)&0x3f));
2665
blob_append_char(pOut, 0x80 + (u8)((c>>6)&0x3f));
2666
blob_append_char(pOut, 0x80 + (u8)(c&0x3f));
2667
}
2668
}else{
2669
prevWS = 0;
2670
blob_append(pOut, zIn, n);
2671
}
2672
zIn += n;
2673
}
2674
if( nMark ){
2675
addMark(pOut, mFlags, 1);
2676
}
2677
}
2678
2679
/*
2680
** COMMAND: test-html-to-text
2681
**
2682
** Usage: %fossil test-html-to-text [OPTIONS] FILE ...
2683
**
2684
** Read all files named on the command-line. Convert the file
2685
** content from HTML to text and write the results on standard
2686
** output.
2687
**
2688
** This command is intended as a test and debug interface for
2689
** the html_to_plaintext() routine.
2690
**
2691
** Options:
2692
**
2693
** --vt100 Translate <mark> and </mark> into ANSI/VT100
2694
** escapes to highlight the contained text.
2695
*/
2696
void test_html_to_text(void){
2697
Blob in, out;
2698
int i;
2699
int mFlags = 0;
2700
if( find_option("vt100",0,0)!=0 ) mFlags |= HTOT_VT100;
2701
2702
for(i=2; i<g.argc; i++){
2703
blob_read_from_file(&in, g.argv[i], ExtFILE);
2704
blob_zero(&out);
2705
html_to_plaintext(blob_str(&in), &out, mFlags);
2706
blob_reset(&in);
2707
fossil_puts(blob_buffer(&out), 0, blob_size(&out));
2708
blob_reset(&out);
2709
}
2710
}
2711
2712
/****************************************************************************
2713
** safe-html:
2714
**
2715
** An interface for preventing HTML constructs (ex: <style>, <form>, etc)
2716
** from being inserted into Wiki and Forum posts using Markdown. See the
2717
** comment on safe_html_append() for additional information on what is meant
2718
** by "safe".
2719
**
2720
** The safe-html restrictions only apply to Markdown, as Fossil-Wiki only
2721
** allows safe-html by design - unsafe-HTML is never and has never been
2722
** allowed in Fossil-Wiki.
2723
**
2724
** This code is in the wikiformat.c file so that it can have access to the
2725
** white-list of acceptable HTML in the aMarkup[] array.
2726
*/
2727
2728
/*
2729
** An instance of this object keeps track of the nesting of HTML
2730
** elements for safe_html_append().
2731
*/
2732
typedef struct HtmlTagStack HtmlTagStack;
2733
struct HtmlTagStack {
2734
int n; /* Current tag stack depth */
2735
int nAlloc; /* Space allocated for aStack[] */
2736
int *aStack; /* The stack of tags */
2737
int aSpace[10]; /* Initial static space, to avoid malloc() */
2738
};
2739
2740
/*
2741
** Initialize bulk memory to a valid empty tagstack.
2742
*/
2743
static void html_tagstack_init(HtmlTagStack *p){
2744
p->n = 0;
2745
p->nAlloc = 0;
2746
p->aStack = p->aSpace;
2747
}
2748
2749
/*
2750
** Push a new element onto the tag stack
2751
*/
2752
static void html_tagstack_push(HtmlTagStack *p, int e){
2753
if( p->n>=ArraySize(p->aSpace) && p->n>=p->nAlloc ){
2754
if( p->nAlloc==0 ){
2755
int *aNew;
2756
p->nAlloc = 50;
2757
aNew = fossil_malloc( sizeof(p->aStack[0])*p->nAlloc );
2758
memcpy(aNew, p->aStack, sizeof(p->aStack[0])*p->n );
2759
p->aStack = aNew;
2760
}else{
2761
p->nAlloc *= 2;
2762
p->aStack = fossil_realloc(p->aStack, sizeof(p->aStack[0])*p->nAlloc );
2763
}
2764
}
2765
p->aStack[p->n++] = e;
2766
}
2767
2768
/*
2769
** Clear a tag stack, reclaiming any memory allocations.
2770
*/
2771
static void html_tagstack_clear(HtmlTagStack *p){
2772
if( p->nAlloc ){
2773
fossil_free(p->aStack);
2774
p->nAlloc = 0;
2775
p->aStack = p->aSpace;
2776
}
2777
p->n = 0;
2778
}
2779
2780
/*
2781
** The HTML end-tag eEnd wants to be added to pBlob.
2782
**
2783
** If an open-tag for eEnd exists anywhere on the stack, then
2784
** pop it and all prior elements from the task, issuing appropriate
2785
** end-tags as you go.
2786
**
2787
** If there is no open-tag for eEnd on the stack, then this
2788
** routine is a no-op.
2789
*/
2790
static void html_tagstack_pop(HtmlTagStack *p, Blob *pBlob, int eEnd){
2791
int i, e;
2792
if( eEnd!=0 ){
2793
for(i=p->n-1; i>=0 && p->aStack[i]!=eEnd; i--){}
2794
if( i<0 ){
2795
blob_appendf(pBlob, "<span class='error'>&lt;/%s&gt;</span>",
2796
aMarkup[eEnd].zName);
2797
return;
2798
}
2799
}else if( p->n==0 ){
2800
return;
2801
}
2802
do{
2803
e = p->aStack[--p->n];
2804
if( e==eEnd || (aMarkup[e].iType & MUTYPE_Nested)!=0 ){
2805
blob_appendf(pBlob, "</%s>", aMarkup[e].zName);
2806
}
2807
}while( e!=eEnd && p->n>0 );
2808
}
2809
2810
/*
2811
** Return a nonce to indicate that safe_html() can allow code through
2812
** without censoring.
2813
**
2814
** When safe_html() is asked to sanitize some HTML, it will ignore
2815
** any text in between two consecutive instances of the nonce. The
2816
** nonce itself is an HTML comment so it is harmless to keep the
2817
** nonce in the middle of the HTML stream. A different nonce is
2818
** chosen each time Fossil is run, using a lot of randomness, so
2819
** an attacker will be unable to guess the nonce in advance.
2820
**
2821
** The original use-case for this mechanism is to allow Pikchr-generated
2822
** SVG in the middle of HTML generated from Markdown. The Markdown
2823
** output will normally be processed by safe_html() to prevent accidental
2824
** or malicious introduction of harmful HTML (ex: <script>) in the
2825
** output stream. The safe_html() only lets through HTML elements
2826
** that are on its allow-list and SVG is not on that list. Hence, in order
2827
** to allow the Pikchr-generated SVG through, it must be surrounded by
2828
** the nonce.
2829
*/
2830
const char *safe_html_nonce(int bGenerate){
2831
static char *zNonce = 0;
2832
if( zNonce==0 && bGenerate ){
2833
zNonce = db_text(0, "SELECT '<!--'||hex(randomblob(32))||'-->';");
2834
}
2835
return zNonce;
2836
}
2837
#define SAFE_NONCE_SIZE (4+64+3)
2838
2839
/*
2840
** Append a safe translation of HTML text to a Blob object.
2841
**
2842
** Restriction: The input to this routine must be writable.
2843
* Temporary changes may be made to the input, but the input is restored
2844
** to its original state prior to returning. If zHtml[nHtml] is not a
2845
** zero character, then a zero might be written in that position
2846
** temporarily, but that slot will also be restored before this routine
2847
** returns.
2848
*/
2849
static void safe_html_append(Blob *pBlob, char *zHtml, int nHtml){
2850
char cLast;
2851
int i, j, n;
2852
HtmlTagStack s;
2853
ParsedMarkup markup;
2854
const char *zNonce;
2855
char *z;
2856
2857
if( nHtml<=0 ) return;
2858
cLast = zHtml[nHtml];
2859
zHtml[nHtml] = 0;
2860
html_tagstack_init(&s);
2861
2862
i = 0;
2863
while( i<nHtml ){
2864
if( zHtml[i]=='<' ){
2865
j = i;
2866
}else{
2867
z = strchr(zHtml+i, '<');
2868
if( z==0 ){
2869
blob_append(pBlob, zHtml+i, nHtml-i);
2870
break;
2871
}
2872
j = (int)(z - zHtml);
2873
blob_append(pBlob, zHtml+i, j-i);
2874
}
2875
if( zHtml[j+1]=='!'
2876
&& j+2*SAFE_NONCE_SIZE<nHtml
2877
&& (zNonce = safe_html_nonce(0))!=0
2878
&& strncmp(zHtml+j,zNonce,SAFE_NONCE_SIZE)==0
2879
&& (z = strstr(zHtml+j+SAFE_NONCE_SIZE,zNonce))!=0
2880
){
2881
i = (int)(z - zHtml) + SAFE_NONCE_SIZE;
2882
blob_append(pBlob, zHtml+j, i-j);
2883
continue;
2884
}
2885
n = html_tag_length(zHtml+j);
2886
if( n==0 ){
2887
blob_append(pBlob, "&lt;", 4);
2888
i = j+1;
2889
continue;
2890
}else{
2891
i = j + n;
2892
}
2893
parseMarkup(&markup, zHtml+j);
2894
if( markup.iCode==MARKUP_INVALID ){
2895
unparseMarkup(&markup);
2896
blob_appendf(pBlob, "<span class='error'>&lt;%.*s&gt;</span>",
2897
n-2, zHtml+j+1);
2898
continue;
2899
}
2900
if( (markup.iType & MUTYPE_Nested)==0 || markup.iCode==MARKUP_P ){
2901
renderMarkup(pBlob, &markup);
2902
}else{
2903
if( markup.endTag ){
2904
html_tagstack_pop(&s, pBlob, markup.iCode);
2905
}else{
2906
renderMarkup(pBlob, &markup);
2907
html_tagstack_push(&s, markup.iCode);
2908
}
2909
}
2910
unparseMarkup(&markup);
2911
}
2912
html_tagstack_pop(&s, pBlob, 0);
2913
html_tagstack_clear(&s);
2914
zHtml[nHtml] = cLast;
2915
}
2916
2917
/*
2918
** This local variable is true if the safe_html() function is enabled.
2919
** In other words, this is true if the output of Markdown should be
2920
** restricted to use only "safe" HTML.
2921
*/
2922
static int safeHtmlEnable = 1;
2923
2924
2925
#if INTERFACE
2926
/*
2927
** Allowed values for the eTrust parameter to safe_html_context().
2928
*/
2929
#define DOCSRC_FILE 1 /* Document is a checked-in file */
2930
#define DOCSRC_FORUM 2 /* Document is a forum post */
2931
#define DOCSRC_TICKET 3 /* Document is a ticket comment */
2932
#define DOCSRC_WIKI 4 /* Document is a wiki page */
2933
#define DOCSRC_TRUSTED 5 /* safe_html() is always a no-op */
2934
#define DOCSRC_UNTRUSTED 6 /* safe_html() is always enabled */
2935
#endif /* INTERFACE */
2936
2937
2938
/*
2939
** Specify the context in which a markdown document with potentially
2940
** unsafe HTML will be rendered.
2941
*/
2942
void safe_html_context(int eTrust){
2943
static const char *zSafeHtmlSetting = 0;
2944
char cPerm = 0;
2945
if( eTrust==DOCSRC_TRUSTED ){
2946
safeHtmlEnable = 0;
2947
return;
2948
}
2949
if( eTrust==DOCSRC_UNTRUSTED ){
2950
safeHtmlEnable = 1;
2951
return;
2952
}
2953
if( zSafeHtmlSetting==0 ){
2954
zSafeHtmlSetting = db_get("safe-html", "");
2955
}
2956
switch( eTrust ){
2957
case DOCSRC_FILE: cPerm = 'b'; break;
2958
case DOCSRC_FORUM: cPerm = 'f'; break;
2959
case DOCSRC_TICKET: cPerm = 't'; break;
2960
case DOCSRC_WIKI: cPerm = 'w'; break;
2961
}
2962
safeHtmlEnable = (strchr(zSafeHtmlSetting,cPerm)==0);
2963
}
2964
2965
/*
2966
** SETTING: safe-html width=8
2967
** This setting controls whether or not unsafe HTML elements
2968
** (such as SCRIPT or STYLE tags) are allowed in Markdown-formatted
2969
** documents. Unsafe HTML is disabled by default. If this setting
2970
** exists and is a string, then letters in that string can enable
2971
** unsafe HTML in various contexts:
2972
**
2973
** - b Unsafe HTML allowed in embedded documentation
2974
** - f Unsafe HTML allowed in forum posts
2975
** - t Unsafe HTML allowed in tickets
2976
** - w Unsafe HTML allowed on wiki pages
2977
*/
2978
/*
2979
** The input blob contains HTML. If safe-html is enabled, then
2980
** convert the input into "safe HTML". The following modifications
2981
** are made:
2982
**
2983
** 1. Remove any elements that are not on the AllowedMarkup list.
2984
** (ex: <script>, <form>, etc.)
2985
**
2986
** 2. Remove any attributes that are not on the AllowedMarkup list.
2987
** (ex: onload=, etc.)
2988
**
2989
** 3. Omit any surplus close-tags. This prevents the script from
2990
** terminating an <div> or similar in the outer context.
2991
**
2992
** 4. Insert additional close-tags as necessary so that any
2993
** tag in the input that needs a close-tag has one. This
2994
** prevents tags in the embedded script from affecting the
2995
** display of content that follows this script in the

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button