| | @@ -123,58 +123,58 @@ |
| 123 | 123 | ** Except for MARKUP_INVALID, this must all be in alphabetical order |
| 124 | 124 | ** and in numerical sequence. The first markup type must be zero. |
| 125 | 125 | ** The value for MARKUP_XYZ must correspond to the <xyz> entry |
| 126 | 126 | ** in aAllowedMarkup[]. |
| 127 | 127 | */ |
| 128 | | -#define MARKUP_INVALID 255 |
| 129 | | -#define MARKUP_A 0 |
| 130 | | -#define MARKUP_ADDRESS 1 |
| 131 | | -#define MARKUP_B 2 |
| 132 | | -#define MARKUP_BIG 3 |
| 133 | | -#define MARKUP_BLOCKQUOTE 4 |
| 134 | | -#define MARKUP_BR 5 |
| 135 | | -#define MARKUP_CENTER 6 |
| 136 | | -#define MARKUP_CITE 7 |
| 137 | | -#define MARKUP_CODE 8 |
| 138 | | -#define MARKUP_DD 9 |
| 139 | | -#define MARKUP_DFN 10 |
| 140 | | -#define MARKUP_DL 11 |
| 141 | | -#define MARKUP_DT 12 |
| 142 | | -#define MARKUP_EM 13 |
| 143 | | -#define MARKUP_FONT 14 |
| 144 | | -#define MARKUP_H1 15 |
| 145 | | -#define MARKUP_H2 16 |
| 146 | | -#define MARKUP_H3 17 |
| 147 | | -#define MARKUP_H4 18 |
| 148 | | -#define MARKUP_H5 19 |
| 149 | | -#define MARKUP_H6 20 |
| 150 | | -#define MARKUP_HR 21 |
| 151 | | -#define MARKUP_IMG 22 |
| 152 | | -#define MARKUP_I 23 |
| 153 | | -#define MARKUP_KBD 24 |
| 154 | | -#define MARKUP_LI 25 |
| 155 | | -#define MARKUP_NOBR 26 |
| 156 | | -#define MARKUP_NOWIKI 27 |
| 157 | | -#define MARKUP_OL 28 |
| 158 | | -#define MARKUP_P 29 |
| 159 | | -#define MARKUP_PRE 30 |
| 160 | | -#define MARKUP_S 31 |
| 161 | | -#define MARKUP_SAMP 32 |
| 162 | | -#define MARKUP_SMALL 33 |
| 163 | | -#define MARKUP_STRIKE 34 |
| 164 | | -#define MARKUP_STRONG 35 |
| 165 | | -#define MARKUP_SUB 36 |
| 166 | | -#define MARKUP_SUP 37 |
| 167 | | -#define MARKUP_TABLE 38 |
| 168 | | -#define MARKUP_TD 39 |
| 169 | | -#define MARKUP_TH 40 |
| 170 | | -#define MARKUP_TR 41 |
| 171 | | -#define MARKUP_TT 42 |
| 172 | | -#define MARKUP_U 43 |
| 173 | | -#define MARKUP_UL 44 |
| 174 | | -#define MARKUP_VAR 45 |
| 175 | | -#define MARKUP_VERBATIM 46 |
| 128 | +#define MARKUP_INVALID 0 |
| 129 | +#define MARKUP_A 1 |
| 130 | +#define MARKUP_ADDRESS 2 |
| 131 | +#define MARKUP_B 3 |
| 132 | +#define MARKUP_BIG 4 |
| 133 | +#define MARKUP_BLOCKQUOTE 5 |
| 134 | +#define MARKUP_BR 6 |
| 135 | +#define MARKUP_CENTER 7 |
| 136 | +#define MARKUP_CITE 8 |
| 137 | +#define MARKUP_CODE 9 |
| 138 | +#define MARKUP_DD 10 |
| 139 | +#define MARKUP_DFN 11 |
| 140 | +#define MARKUP_DL 12 |
| 141 | +#define MARKUP_DT 13 |
| 142 | +#define MARKUP_EM 14 |
| 143 | +#define MARKUP_FONT 15 |
| 144 | +#define MARKUP_H1 16 |
| 145 | +#define MARKUP_H2 17 |
| 146 | +#define MARKUP_H3 18 |
| 147 | +#define MARKUP_H4 19 |
| 148 | +#define MARKUP_H5 20 |
| 149 | +#define MARKUP_H6 21 |
| 150 | +#define MARKUP_HR 22 |
| 151 | +#define MARKUP_IMG 23 |
| 152 | +#define MARKUP_I 24 |
| 153 | +#define MARKUP_KBD 25 |
| 154 | +#define MARKUP_LI 26 |
| 155 | +#define MARKUP_NOBR 27 |
| 156 | +#define MARKUP_NOWIKI 28 |
| 157 | +#define MARKUP_OL 29 |
| 158 | +#define MARKUP_P 30 |
| 159 | +#define MARKUP_PRE 31 |
| 160 | +#define MARKUP_S 32 |
| 161 | +#define MARKUP_SAMP 33 |
| 162 | +#define MARKUP_SMALL 34 |
| 163 | +#define MARKUP_STRIKE 35 |
| 164 | +#define MARKUP_STRONG 36 |
| 165 | +#define MARKUP_SUB 37 |
| 166 | +#define MARKUP_SUP 38 |
| 167 | +#define MARKUP_TABLE 39 |
| 168 | +#define MARKUP_TD 40 |
| 169 | +#define MARKUP_TH 41 |
| 170 | +#define MARKUP_TR 42 |
| 171 | +#define MARKUP_TT 43 |
| 172 | +#define MARKUP_U 44 |
| 173 | +#define MARKUP_UL 45 |
| 174 | +#define MARKUP_VAR 46 |
| 175 | +#define MARKUP_VERBATIM 47 |
| 176 | 176 | |
| 177 | 177 | /* |
| 178 | 178 | ** The various markup is divided into the following types: |
| 179 | 179 | */ |
| 180 | 180 | #define MUTYPE_SINGLE 0x0001 /* <img>, <br>, or <hr> */ |
| | @@ -195,10 +195,11 @@ |
| 195 | 195 | const char *zName; /* Name of the markup */ |
| 196 | 196 | char iCode; /* The MARKUP_* code */ |
| 197 | 197 | short int iType; /* The MUTYPE_* code */ |
| 198 | 198 | int allowedAttr; /* Allowed attributes on this markup */ |
| 199 | 199 | } aMarkup[] = { |
| 200 | + { 0, MARKUP_INVALID, 0, 0 }, |
| 200 | 201 | { "a", MARKUP_A, MUTYPE_HYPERLINK, ATTR_HREF }, |
| 201 | 202 | { "address", MARKUP_ADDRESS, MUTYPE_BLOCK, 0 }, |
| 202 | 203 | { "b", MARKUP_B, MUTYPE_FONT, 0 }, |
| 203 | 204 | { "big", MARKUP_BIG, MUTYPE_FONT, 0 }, |
| 204 | 205 | { "blockquote", MARKUP_BLOCKQUOTE, MUTYPE_BLOCK, 0 }, |
| | @@ -263,11 +264,11 @@ |
| 263 | 264 | /* |
| 264 | 265 | ** Use binary search to locate a tag in the aMarkup[] table. |
| 265 | 266 | */ |
| 266 | 267 | static int findTag(const char *z){ |
| 267 | 268 | int i, c, first, last; |
| 268 | | - first = 0; |
| 269 | + first = 1; |
| 269 | 270 | last = sizeof(aMarkup)/sizeof(aMarkup[0]) - 1; |
| 270 | 271 | while( first<=last ){ |
| 271 | 272 | i = (first+last)/2; |
| 272 | 273 | c = strcmp(aMarkup[i].zName, z); |
| 273 | 274 | if( c==0 ){ |
| | @@ -300,11 +301,28 @@ |
| 300 | 301 | */ |
| 301 | 302 | #define AT_NEWLINE 0x001 /* At start of a line */ |
| 302 | 303 | #define AT_PARAGRAPH 0x002 /* At start of a paragraph */ |
| 303 | 304 | #define ALLOW_WIKI 0x004 /* Allow wiki markup */ |
| 304 | 305 | #define FONT_MARKUP_ONLY 0x008 /* Only allow MUTYPE_FONT markup */ |
| 305 | | -#define IN_LIST 0x010 /* Within <ul> */ |
| 306 | +#define IN_LIST 0x010 /* Within wiki <ul> or <ol> */ |
| 307 | + |
| 308 | +/* |
| 309 | +** Current state of the rendering engine |
| 310 | +*/ |
| 311 | +typedef struct Renderer Renderer; |
| 312 | +struct Renderer { |
| 313 | + Blob *pOut; /* Output appended to this blob */ |
| 314 | + int state; /* Flag that govern rendering */ |
| 315 | + int wikiList; /* Current wiki list type */ |
| 316 | + int inVerbatim; /* True in <verbatim> mode */ |
| 317 | + int preVerbState; /* Value of state prior to verbatim */ |
| 318 | + const char *zVerbatimId; /* The id= attribute of <verbatim> */ |
| 319 | + int nStack; /* Number of elements on the stack */ |
| 320 | + int nAlloc; /* Space allocated for aStack */ |
| 321 | + unsigned char *aStack; /* Open markup stack */ |
| 322 | +}; |
| 323 | + |
| 306 | 324 | |
| 307 | 325 | /* |
| 308 | 326 | ** z points to a "<" character. Check to see if this is the start of |
| 309 | 327 | ** a valid markup. If it is, return the total number of characters in |
| 310 | 328 | ** the markup including the initial "<" and the terminating ">". If |
| | @@ -410,10 +428,46 @@ |
| 410 | 428 | n++; |
| 411 | 429 | } |
| 412 | 430 | if( i<2 || isspace(z[n]) ) return 0; |
| 413 | 431 | return n; |
| 414 | 432 | } |
| 433 | + |
| 434 | +/* |
| 435 | +** Check to see if the z[] string is the beginning of a enumeration value. |
| 436 | +** If it is, return the length of the bullet text. Otherwise return 0. |
| 437 | +** |
| 438 | +** Syntax: |
| 439 | +** * a tab or two or more spaces |
| 440 | +** * one or more digits |
| 441 | +** * optional "." |
| 442 | +** * another tab or two or more additional spaces |
| 443 | +** |
| 444 | +*/ |
| 445 | +static int enumLength(const char *z){ |
| 446 | + int i, n; |
| 447 | + n = 0; |
| 448 | + i = 0; |
| 449 | + while( z[n]==' ' || z[n]=='\t' ){ |
| 450 | + if( z[n]=='\t' ) i++; |
| 451 | + i++; |
| 452 | + n++; |
| 453 | + } |
| 454 | + if( i<2 ) return 0; |
| 455 | + for(i=0; isdigit(z[n]); i++, n++){} |
| 456 | + if( i==0 ) return 0; |
| 457 | + if( z[n]=='.' ){ |
| 458 | + n++; |
| 459 | + } |
| 460 | + i = 0; |
| 461 | + while( z[n]==' ' || z[n]=='\t' ){ |
| 462 | + if( z[n]=='\t' ) i++; |
| 463 | + i++; |
| 464 | + n++; |
| 465 | + } |
| 466 | + if( i<2 || isspace(z[n]) ) return 0; |
| 467 | + return n; |
| 468 | +} |
| 415 | 469 | |
| 416 | 470 | /* |
| 417 | 471 | ** Check to see if the z[] string is the beginning of an indented |
| 418 | 472 | ** paragraph. If it is, return the length of the indent. Otherwise |
| 419 | 473 | ** return 0. |
| | @@ -483,17 +537,15 @@ |
| 483 | 537 | n = bulletLength(z); |
| 484 | 538 | if( n>0 ){ |
| 485 | 539 | *pTokenType = TOKEN_BULLET; |
| 486 | 540 | return n; |
| 487 | 541 | } |
| 488 | | -#if 0 |
| 489 | 542 | n = enumLength(z); |
| 490 | 543 | if( n>0 ){ |
| 491 | 544 | *pTokenType = TOKEN_ENUM; |
| 492 | 545 | return n; |
| 493 | 546 | } |
| 494 | | -#endif |
| 495 | 547 | } |
| 496 | 548 | if( (state & AT_PARAGRAPH)!=0 && isspace(z[0]) ){ |
| 497 | 549 | n = indentLength(z); |
| 498 | 550 | if( n>0 ){ |
| 499 | 551 | *pTokenType = TOKEN_INDENT; |
| | @@ -628,25 +680,10 @@ |
| 628 | 680 | n = strlen(z); |
| 629 | 681 | z[n] = p->aAttr[i].cTerm; |
| 630 | 682 | } |
| 631 | 683 | } |
| 632 | 684 | |
| 633 | | -/* |
| 634 | | -** Current state of the rendering engine |
| 635 | | -*/ |
| 636 | | -typedef struct Renderer Renderer; |
| 637 | | -struct Renderer { |
| 638 | | - Blob *pOut; /* Output appended to this blob */ |
| 639 | | - int state; /* Flag that govern rendering */ |
| 640 | | - int inVerbatim; /* True in <verbatim> mode */ |
| 641 | | - int preVerbState; /* Value of state prior to verbatim */ |
| 642 | | - const char *zVerbatimId; /* The id= attribute of <verbatim> */ |
| 643 | | - int nStack; /* Number of elements on the stack */ |
| 644 | | - int nAlloc; /* Space allocated for aStack */ |
| 645 | | - unsigned char *aStack; /* Open markup stack */ |
| 646 | | -}; |
| 647 | | - |
| 648 | 685 | /* |
| 649 | 686 | ** Pop a single element off of the stack. As the element is popped, |
| 650 | 687 | ** output its end tag. |
| 651 | 688 | */ |
| 652 | 689 | static void popStack(Renderer *p){ |
| | @@ -686,23 +723,24 @@ |
| 686 | 723 | } |
| 687 | 724 | |
| 688 | 725 | /* |
| 689 | 726 | ** Pop the stack until the top-most element of the stack |
| 690 | 727 | ** is an element that matches the type in iMask. Return |
| 691 | | -** true on success. If the stack does not have an element |
| 728 | +** code of the markup element that is on left on top of the stack. |
| 729 | +** If the stack does not have an element |
| 692 | 730 | ** that matches iMask, then leave the stack unchanged and |
| 693 | | -** return false. |
| 731 | +** return false (MARKUP_INVALID). |
| 694 | 732 | */ |
| 695 | 733 | static int backupToType(Renderer *p, int iMask){ |
| 696 | 734 | int i; |
| 697 | 735 | for(i=p->nStack-1; i>=0 && (aMarkup[p->aStack[i]].iType&iMask)==0; i--){} |
| 698 | 736 | if( i<0 ) return 0; |
| 699 | 737 | i++; |
| 700 | 738 | while( p->nStack>i ){ |
| 701 | 739 | popStack(p); |
| 702 | 740 | } |
| 703 | | - return 1; |
| 741 | + return p->aStack[i-1]; |
| 704 | 742 | } |
| 705 | 743 | |
| 706 | 744 | /* |
| 707 | 745 | ** Add missing markup in preparation for writing text. |
| 708 | 746 | ** |
| | @@ -767,10 +805,14 @@ |
| 767 | 805 | while( z[0] ){ |
| 768 | 806 | n = nextToken(z, p->state, &tokenType); |
| 769 | 807 | p->state &= ~(AT_NEWLINE|AT_PARAGRAPH); |
| 770 | 808 | switch( tokenType ){ |
| 771 | 809 | case TOKEN_PARAGRAPH: { |
| 810 | + if( p->wikiList ){ |
| 811 | + popStackToTag(p, p->wikiList); |
| 812 | + p->wikiList = 0; |
| 813 | + } |
| 772 | 814 | blob_append(p->pOut, "\n\n<p>", -1); |
| 773 | 815 | p->state |= AT_PARAGRAPH|AT_NEWLINE; |
| 774 | 816 | popStackToTag(p, MARKUP_P); |
| 775 | 817 | break; |
| 776 | 818 | } |
| | @@ -778,17 +820,41 @@ |
| 778 | 820 | blob_append(p->pOut, "\n", 1); |
| 779 | 821 | p->state |= AT_NEWLINE; |
| 780 | 822 | break; |
| 781 | 823 | } |
| 782 | 824 | case TOKEN_BULLET: { |
| 783 | | - if( backupToType(p, MUTYPE_LIST)==0 ){ |
| 825 | + if( p->wikiList!=MARKUP_UL ){ |
| 826 | + if( p->wikiList ){ |
| 827 | + popStackToTag(p, p->wikiList); |
| 828 | + } |
| 784 | 829 | pushStack(p, MARKUP_UL); |
| 785 | 830 | blob_append(p->pOut, "<ul>", 4); |
| 831 | + p->wikiList = MARKUP_UL; |
| 786 | 832 | } |
| 787 | 833 | pushStack(p, MARKUP_LI); |
| 788 | 834 | blob_append(p->pOut, "<li>", 4); |
| 789 | 835 | break; |
| 836 | + } |
| 837 | + case TOKEN_ENUM: { |
| 838 | + if( p->wikiList!=MARKUP_OL ){ |
| 839 | + if( p->wikiList ){ |
| 840 | + popStackToTag(p, p->wikiList); |
| 841 | + } |
| 842 | + pushStack(p, MARKUP_OL); |
| 843 | + blob_append(p->pOut, "<ol>", 4); |
| 844 | + p->wikiList = MARKUP_OL; |
| 845 | + } |
| 846 | + pushStack(p, MARKUP_LI); |
| 847 | + blob_appendf(p->pOut, "<li value=\"%d\">", atoi(z)); |
| 848 | + break; |
| 849 | + } |
| 850 | + case TOKEN_INDENT: { |
| 851 | + assert( p->wikiList==0 ); |
| 852 | + pushStack(p, MARKUP_BLOCKQUOTE); |
| 853 | + blob_append(p->pOut, "<blockquote>", -1); |
| 854 | + p->wikiList = MARKUP_BLOCKQUOTE; |
| 855 | + break; |
| 790 | 856 | } |
| 791 | 857 | case TOKEN_CHARACTER: { |
| 792 | 858 | if( z[0]=='<' ){ |
| 793 | 859 | blob_append(p->pOut, "<", 4); |
| 794 | 860 | }else if( z[0]=='&' ){ |
| | @@ -904,33 +970,24 @@ |
| 904 | 970 | |
| 905 | 971 | /* |
| 906 | 972 | ** Transform the text in the pIn blob. Write the results |
| 907 | 973 | ** into the pOut blob. The pOut blob should already be |
| 908 | 974 | ** initialized. The output is merely appended to pOut. |
| 909 | | -** |
| 910 | | -** The transformations carried out depend on the ops flag: |
| 911 | | -** |
| 912 | | -** WIKI_NOFOLLOW |
| 913 | | -** |
| 914 | | -** * Add the nofollow attribute to external links |
| 915 | | -** |
| 916 | | -** WIKI_HTML |
| 917 | | -** |
| 918 | | -** * Convert wiki into HTML |
| 919 | | -** * Remove <nowiki> and <verbatium> |
| 920 | | -** * Convert & into & |
| 921 | | -** * Unrecognized markup and markup within <verbatim> |
| 922 | | -** is converted into <...> |
| 923 | | -** * Unauthorized attributes on markup are removed |
| 975 | +** If pOut is NULL, then the output is appended to the CGI |
| 976 | +** reply. |
| 924 | 977 | */ |
| 925 | | -void wiki_convert(Blob *pIn, Blob *pOut, int ops){ |
| 978 | +void wiki_convert(Blob *pIn, Blob *pOut){ |
| 926 | 979 | char *z; |
| 927 | 980 | Renderer renderer; |
| 928 | 981 | |
| 929 | 982 | memset(&renderer, 0, sizeof(renderer)); |
| 930 | 983 | renderer.state = ALLOW_WIKI|AT_NEWLINE|AT_PARAGRAPH; |
| 931 | | - renderer.pOut = pOut; |
| 984 | + if( pOut ){ |
| 985 | + renderer.pOut = pOut; |
| 986 | + }else{ |
| 987 | + renderer.pOut = cgi_output_blob(); |
| 988 | + } |
| 932 | 989 | |
| 933 | 990 | z = blob_str(pIn); |
| 934 | 991 | wiki_render(&renderer, z); |
| 935 | 992 | while( renderer.nStack ){ |
| 936 | 993 | popStack(&renderer); |
| | @@ -946,8 +1003,8 @@ |
| 946 | 1003 | void test_wiki_render(void){ |
| 947 | 1004 | Blob in, out; |
| 948 | 1005 | if( g.argc!=3 ) usage("FILE"); |
| 949 | 1006 | blob_zero(&out); |
| 950 | 1007 | blob_read_from_file(&in, g.argv[2]); |
| 951 | | - wiki_convert(&in, &out, WIKI_HTML); |
| 1008 | + wiki_convert(&in, &out); |
| 952 | 1009 | blob_write_to_file(&out, "-"); |
| 953 | 1010 | } |
| 954 | 1011 | |