Fossil SCM
Implement backslash-escapes in WIKI_MARKDOWN_SPAN mode. Add the --tokenize option to the test-wiki-render command.
Commit
6f9e2992447142037c8a69c566e611de8aab24eeff9c2005b374c94a01db9453
Parent
8f55b909c4d23d0…
1 file changed
+117
-11
+117
-11
| --- src/wikiformat.c | ||
| +++ src/wikiformat.c | ||
| @@ -436,11 +436,30 @@ | ||
| 436 | 436 | #define TOKEN_NUM_LI 7 /* " # " */ |
| 437 | 437 | #define TOKEN_ENUM 8 /* " \(?\d+[.)]? " */ |
| 438 | 438 | #define TOKEN_INDENT 9 /* " " */ |
| 439 | 439 | #define TOKEN_RAW 10 /* Output exactly (used when wiki-use-html==1) */ |
| 440 | 440 | #define TOKEN_AUTOLINK 11 /* <URL> */ |
| 441 | -#define TOKEN_TEXT 12 /* None of the above */ | |
| 441 | +#define TOKEN_MDSPAN 12 /* Markdown span characters: * _ ` */ | |
| 442 | +#define TOKEN_BACKSLASH 13 /* A backslash-escape */ | |
| 443 | +#define TOKEN_TEXT 14 /* None of the above */ | |
| 444 | + | |
| 445 | +static const char *wiki_token_names[] = { "", | |
| 446 | + "MARKUP", | |
| 447 | + "CHARACTER", | |
| 448 | + "LINK", | |
| 449 | + "PARAGRAPH", | |
| 450 | + "NEWLINE", | |
| 451 | + "BUL_LI", | |
| 452 | + "NUM_LI", | |
| 453 | + "ENUM", | |
| 454 | + "INDENT", | |
| 455 | + "RAW", | |
| 456 | + "AUTOLINK", | |
| 457 | + "MDSPAN", | |
| 458 | + "BACKSLASH", | |
| 459 | + "TEXT", | |
| 460 | +}; | |
| 442 | 461 | |
| 443 | 462 | /* |
| 444 | 463 | ** State flags. Save the lower 16 bits for the WIKI_* flags. |
| 445 | 464 | */ |
| 446 | 465 | #define AT_NEWLINE 0x0010000 /* At start of a line */ |
| @@ -545,20 +564,27 @@ | ||
| 545 | 564 | ** |
| 546 | 565 | ** < |
| 547 | 566 | ** & |
| 548 | 567 | ** \n |
| 549 | 568 | ** [ |
| 569 | +** _ * ` \ <-- WIKI_MARKDOWN_SPAN only. | |
| 550 | 570 | ** |
| 551 | 571 | ** The "[" is only considered if flags contain ALLOW_LINKS or ALLOW_WIKI. |
| 552 | 572 | ** The "\n" is only considered interesting if the flags constains ALLOW_WIKI. |
| 573 | +** The markdown span characters, _ * ` and \, are only considered if both | |
| 574 | +** ALLOW_WIKI and WIKI_MARKDOWN_SPAN are set. | |
| 553 | 575 | */ |
| 554 | 576 | static int textLength(const char *z, int flags){ |
| 555 | 577 | const char *zReject; |
| 556 | 578 | if( flags & ALLOW_WIKI ){ |
| 557 | - zReject = "<&[\n"; | |
| 579 | + if( flags & WIKI_MARKDOWN_SPAN ){ | |
| 580 | + zReject = "_*`\\\n[<&"; | |
| 581 | + }else{ | |
| 582 | + zReject = "\n[<&"; | |
| 583 | + } | |
| 558 | 584 | }else if( flags & ALLOW_LINKS ){ |
| 559 | - zReject = "<&["; | |
| 585 | + zReject = "[<&"; | |
| 560 | 586 | }else{ |
| 561 | 587 | zReject = "<&"; |
| 562 | 588 | } |
| 563 | 589 | return strcspn(z, zReject); |
| 564 | 590 | } |
| @@ -675,10 +701,16 @@ | ||
| 675 | 701 | /* |
| 676 | 702 | ** Get the next wiki token. |
| 677 | 703 | ** |
| 678 | 704 | ** z points to the start of a token. Return the number of |
| 679 | 705 | ** characters in that token. Write the token type into *pTokenType. |
| 706 | +** | |
| 707 | +** Only wiki-style [target] links are recognized by this routine. | |
| 708 | +** For markdown-style [display](target) links, though routine only | |
| 709 | +** see the "[display]" part. But the caller will recognize that | |
| 710 | +** "(target)" follows immediately afterwards and deal with that, if | |
| 711 | +** markdown-style hyperlinks are enabled. | |
| 680 | 712 | */ |
| 681 | 713 | static int nextWikiToken(const char *z, Renderer *p, int *pTokenType){ |
| 682 | 714 | int n; |
| 683 | 715 | if( z[0]=='<' ){ |
| 684 | 716 | n = html_tag_length(z); |
| @@ -685,10 +717,12 @@ | ||
| 685 | 717 | if( n>0 ){ |
| 686 | 718 | *pTokenType = TOKEN_MARKUP; |
| 687 | 719 | return n; |
| 688 | 720 | } |
| 689 | 721 | if( z[1]=='h' |
| 722 | + && !p->inVerbatim | |
| 723 | + && (p->state & (ALLOW_WIKI|ALLOW_LINKS))==(ALLOW_WIKI|ALLOW_LINKS) | |
| 690 | 724 | && (strncmp(z,"<https://",9)==0 || strncmp(z,"<http://",8)==0) |
| 691 | 725 | ){ |
| 692 | 726 | for(n=8; z[n] && z[n]!='>'; n++){} |
| 693 | 727 | if( z[n]=='>' ){ |
| 694 | 728 | *pTokenType = TOKEN_AUTOLINK; |
| @@ -738,10 +772,22 @@ | ||
| 738 | 772 | } |
| 739 | 773 | } |
| 740 | 774 | if( z[0]=='[' && (n = linkLength(z))>0 ){ |
| 741 | 775 | *pTokenType = TOKEN_LINK; |
| 742 | 776 | return n; |
| 777 | + } | |
| 778 | + if( z[0]=='*' || z[0]=='_' || z[0]=='`' ){ | |
| 779 | + *pTokenType = TOKEN_MDSPAN; | |
| 780 | + return 1 + (z[1]==z[0]); | |
| 781 | + } | |
| 782 | + if( z[0]=='\\' ){ | |
| 783 | + if( z[1]==0 || fossil_isspace(z[1]) || (z[1]&0x80)!=0 ){ | |
| 784 | + *pTokenType = TOKEN_TEXT; | |
| 785 | + return 1; | |
| 786 | + } | |
| 787 | + *pTokenType = TOKEN_BACKSLASH; | |
| 788 | + return 2; | |
| 743 | 789 | } |
| 744 | 790 | }else if( (p->state & ALLOW_LINKS)!=0 && z[0]=='[' && (n = linkLength(z))>0 ){ |
| 745 | 791 | *pTokenType = TOKEN_LINK; |
| 746 | 792 | return n; |
| 747 | 793 | } |
| @@ -752,10 +798,19 @@ | ||
| 752 | 798 | /* |
| 753 | 799 | ** Parse only Wiki links, return everything else as TOKEN_RAW. |
| 754 | 800 | ** |
| 755 | 801 | ** z points to the start of a token. Return the number of |
| 756 | 802 | ** characters in that token. Write the token type into *pTokenType. |
| 803 | +** | |
| 804 | +** Only wiki-style [target] links are recognized by this routine. | |
| 805 | +** For markdown-style [display](target) links, though routine only | |
| 806 | +** see the "[display]" part. But the caller will recognize that | |
| 807 | +** "(target)" follows immediately afterwards and deal with that, if | |
| 808 | +** markdown-style hyperlinks are enabled. | |
| 809 | +** | |
| 810 | +** Auto-links ("<URL>") are not recognized at all, since they are | |
| 811 | +** not back-referenced. | |
| 757 | 812 | */ |
| 758 | 813 | static int nextRawToken(const char *z, Renderer *p, int *pTokenType){ |
| 759 | 814 | int n; |
| 760 | 815 | if( z[0]=='[' && (n = linkLength(z))>0 ){ |
| 761 | 816 | *pTokenType = TOKEN_LINK; |
| @@ -1644,10 +1699,21 @@ | ||
| 1644 | 1699 | p->state = savedState; |
| 1645 | 1700 | blob_append(p->pOut, zClose, -1); |
| 1646 | 1701 | } |
| 1647 | 1702 | break; |
| 1648 | 1703 | } |
| 1704 | + case TOKEN_BACKSLASH: { | |
| 1705 | + if( (p->state & WIKI_MARKDOWN_SPAN)==0 ){ | |
| 1706 | + /* Ignore backslashes in traditional Wiki */ | |
| 1707 | + blob_append_char(p->pOut, '\\'); | |
| 1708 | + n = 1; | |
| 1709 | + }else{ | |
| 1710 | + blob_append_char(p->pOut, z[1]); | |
| 1711 | + } | |
| 1712 | + break; | |
| 1713 | + } | |
| 1714 | + case TOKEN_MDSPAN: | |
| 1649 | 1715 | case TOKEN_TEXT: { |
| 1650 | 1716 | int i; |
| 1651 | 1717 | for(i=0; i<n && fossil_isspace(z[i]); i++){} |
| 1652 | 1718 | if( i<n ) startAutoParagraph(p); |
| 1653 | 1719 | blob_append(p->pOut, z, n); |
| @@ -1906,10 +1972,44 @@ | ||
| 1906 | 1972 | popStack(&renderer); |
| 1907 | 1973 | } |
| 1908 | 1974 | blob_append_char(renderer.pOut, '\n'); |
| 1909 | 1975 | free(renderer.aStack); |
| 1910 | 1976 | } |
| 1977 | + | |
| 1978 | +/* | |
| 1979 | +** Output a tokenization of the input file. Debugging use only. | |
| 1980 | +*/ | |
| 1981 | +static void test_tokenize(Blob *pIn, Blob *pOut, int flags){ | |
| 1982 | + Renderer renderer; | |
| 1983 | + int tokenType; | |
| 1984 | + int n; | |
| 1985 | + int wikiHtmlOnly = (flags & (WIKI_HTMLONLY | WIKI_LINKSONLY))!=0; | |
| 1986 | + char *z = blob_str(pIn); | |
| 1987 | + | |
| 1988 | + /* Make sure the attribute constants and names still align | |
| 1989 | + ** following changes in the attribute list. */ | |
| 1990 | + assert( fossil_strcmp(aAttribute[ATTR_WIDTH].zName, "width")==0 ); | |
| 1991 | + | |
| 1992 | + memset(&renderer, 0, sizeof(renderer)); | |
| 1993 | + renderer.renderFlags = flags; | |
| 1994 | + renderer.state = ALLOW_WIKI|flags; | |
| 1995 | + while( z[0] ){ | |
| 1996 | + char cSave; | |
| 1997 | + if( wikiHtmlOnly ){ | |
| 1998 | + n = nextRawToken(z, &renderer, &tokenType); | |
| 1999 | + }else{ | |
| 2000 | + n = nextWikiToken(z, &renderer, &tokenType); | |
| 2001 | + } | |
| 2002 | + cSave = z[n]; | |
| 2003 | + z[n] = 0; | |
| 2004 | + blob_appendf(pOut, "%-12s %z\n", | |
| 2005 | + wiki_token_names[tokenType], | |
| 2006 | + encode_json_string_literal(z, 1, 0)); | |
| 2007 | + z[n] = cSave; | |
| 2008 | + z += n; | |
| 2009 | + } | |
| 2010 | +} | |
| 1911 | 2011 | |
| 1912 | 2012 | /* |
| 1913 | 2013 | ** COMMAND: test-wiki-render |
| 1914 | 2014 | ** |
| 1915 | 2015 | ** Usage: %fossil test-wiki-render FILE [OPTIONS] |
| @@ -1925,15 +2025,16 @@ | ||
| 1925 | 2025 | ** --linksonly Set the WIKI_LINKSONLY flag |
| 1926 | 2026 | ** --md-span Allow markdown span syntax: links and emphasis marks |
| 1927 | 2027 | ** --nobadlinks Set the WIKI_NOBADLINKS flag |
| 1928 | 2028 | ** --noblock Set the WIKI_NOBLOCK flag |
| 1929 | 2029 | ** --text Run the output through html_to_plaintext(). |
| 2030 | +** --tokenize Output a tokenization of the input file | |
| 1930 | 2031 | */ |
| 1931 | 2032 | void test_wiki_render(void){ |
| 1932 | 2033 | Blob in, out; |
| 1933 | 2034 | int flags = 0; |
| 1934 | - int bText; | |
| 2035 | + int bText, bTokenize; | |
| 1935 | 2036 | if( find_option("buttons",0,0)!=0 ) flags |= WIKI_BUTTONS; |
| 1936 | 2037 | if( find_option("htmlonly",0,0)!=0 ) flags |= WIKI_HTMLONLY; |
| 1937 | 2038 | if( find_option("linksonly",0,0)!=0 ) flags |= WIKI_LINKSONLY; |
| 1938 | 2039 | if( find_option("nobadlinks",0,0)!=0 ) flags |= WIKI_NOBADLINKS; |
| 1939 | 2040 | if( find_option("inline",0,0)!=0 ) flags |= WIKI_INLINE; |
| @@ -1941,22 +2042,27 @@ | ||
| 1941 | 2042 | if( find_option("md-span",0,0)!=0 ) flags |= WIKI_MARKDOWN_SPAN; |
| 1942 | 2043 | if( find_option("dark-pikchr",0,0)!=0 ){ |
| 1943 | 2044 | pikchr_to_html_add_flags( PIKCHR_PROCESS_DARK_MODE ); |
| 1944 | 2045 | } |
| 1945 | 2046 | bText = find_option("text",0,0)!=0; |
| 2047 | + bTokenize = find_option("tokenize",0,0)!=0; | |
| 1946 | 2048 | db_find_and_open_repository(OPEN_OK_NOT_FOUND|OPEN_SUBSTITUTE,0); |
| 1947 | 2049 | verify_all_options(); |
| 1948 | 2050 | if( g.argc!=3 ) usage("FILE"); |
| 1949 | 2051 | blob_zero(&out); |
| 1950 | 2052 | blob_read_from_file(&in, g.argv[2], ExtFILE); |
| 1951 | - wiki_convert(&in, &out, flags); | |
| 1952 | - if( bText ){ | |
| 1953 | - Blob txt; | |
| 1954 | - blob_init(&txt, 0, 0); | |
| 1955 | - html_to_plaintext(blob_str(&out),&txt); | |
| 1956 | - blob_reset(&out); | |
| 1957 | - out = txt; | |
| 2053 | + if( bTokenize ){ | |
| 2054 | + test_tokenize(&in, &out, flags); | |
| 2055 | + }else{ | |
| 2056 | + wiki_convert(&in, &out, flags); | |
| 2057 | + if( bText ){ | |
| 2058 | + Blob txt; | |
| 2059 | + blob_init(&txt, 0, 0); | |
| 2060 | + html_to_plaintext(blob_str(&out),&txt); | |
| 2061 | + blob_reset(&out); | |
| 2062 | + out = txt; | |
| 2063 | + } | |
| 1958 | 2064 | } |
| 1959 | 2065 | blob_write_to_file(&out, "-"); |
| 1960 | 2066 | } |
| 1961 | 2067 | |
| 1962 | 2068 | /* |
| 1963 | 2069 |
| --- src/wikiformat.c | |
| +++ src/wikiformat.c | |
| @@ -436,11 +436,30 @@ | |
| 436 | #define TOKEN_NUM_LI 7 /* " # " */ |
| 437 | #define TOKEN_ENUM 8 /* " \(?\d+[.)]? " */ |
| 438 | #define TOKEN_INDENT 9 /* " " */ |
| 439 | #define TOKEN_RAW 10 /* Output exactly (used when wiki-use-html==1) */ |
| 440 | #define TOKEN_AUTOLINK 11 /* <URL> */ |
| 441 | #define TOKEN_TEXT 12 /* None of the above */ |
| 442 | |
| 443 | /* |
| 444 | ** State flags. Save the lower 16 bits for the WIKI_* flags. |
| 445 | */ |
| 446 | #define AT_NEWLINE 0x0010000 /* At start of a line */ |
| @@ -545,20 +564,27 @@ | |
| 545 | ** |
| 546 | ** < |
| 547 | ** & |
| 548 | ** \n |
| 549 | ** [ |
| 550 | ** |
| 551 | ** The "[" is only considered if flags contain ALLOW_LINKS or ALLOW_WIKI. |
| 552 | ** The "\n" is only considered interesting if the flags constains ALLOW_WIKI. |
| 553 | */ |
| 554 | static int textLength(const char *z, int flags){ |
| 555 | const char *zReject; |
| 556 | if( flags & ALLOW_WIKI ){ |
| 557 | zReject = "<&[\n"; |
| 558 | }else if( flags & ALLOW_LINKS ){ |
| 559 | zReject = "<&["; |
| 560 | }else{ |
| 561 | zReject = "<&"; |
| 562 | } |
| 563 | return strcspn(z, zReject); |
| 564 | } |
| @@ -675,10 +701,16 @@ | |
| 675 | /* |
| 676 | ** Get the next wiki token. |
| 677 | ** |
| 678 | ** z points to the start of a token. Return the number of |
| 679 | ** characters in that token. Write the token type into *pTokenType. |
| 680 | */ |
| 681 | static int nextWikiToken(const char *z, Renderer *p, int *pTokenType){ |
| 682 | int n; |
| 683 | if( z[0]=='<' ){ |
| 684 | n = html_tag_length(z); |
| @@ -685,10 +717,12 @@ | |
| 685 | if( n>0 ){ |
| 686 | *pTokenType = TOKEN_MARKUP; |
| 687 | return n; |
| 688 | } |
| 689 | if( z[1]=='h' |
| 690 | && (strncmp(z,"<https://",9)==0 || strncmp(z,"<http://",8)==0) |
| 691 | ){ |
| 692 | for(n=8; z[n] && z[n]!='>'; n++){} |
| 693 | if( z[n]=='>' ){ |
| 694 | *pTokenType = TOKEN_AUTOLINK; |
| @@ -738,10 +772,22 @@ | |
| 738 | } |
| 739 | } |
| 740 | if( z[0]=='[' && (n = linkLength(z))>0 ){ |
| 741 | *pTokenType = TOKEN_LINK; |
| 742 | return n; |
| 743 | } |
| 744 | }else if( (p->state & ALLOW_LINKS)!=0 && z[0]=='[' && (n = linkLength(z))>0 ){ |
| 745 | *pTokenType = TOKEN_LINK; |
| 746 | return n; |
| 747 | } |
| @@ -752,10 +798,19 @@ | |
| 752 | /* |
| 753 | ** Parse only Wiki links, return everything else as TOKEN_RAW. |
| 754 | ** |
| 755 | ** z points to the start of a token. Return the number of |
| 756 | ** characters in that token. Write the token type into *pTokenType. |
| 757 | */ |
| 758 | static int nextRawToken(const char *z, Renderer *p, int *pTokenType){ |
| 759 | int n; |
| 760 | if( z[0]=='[' && (n = linkLength(z))>0 ){ |
| 761 | *pTokenType = TOKEN_LINK; |
| @@ -1644,10 +1699,21 @@ | |
| 1644 | p->state = savedState; |
| 1645 | blob_append(p->pOut, zClose, -1); |
| 1646 | } |
| 1647 | break; |
| 1648 | } |
| 1649 | case TOKEN_TEXT: { |
| 1650 | int i; |
| 1651 | for(i=0; i<n && fossil_isspace(z[i]); i++){} |
| 1652 | if( i<n ) startAutoParagraph(p); |
| 1653 | blob_append(p->pOut, z, n); |
| @@ -1906,10 +1972,44 @@ | |
| 1906 | popStack(&renderer); |
| 1907 | } |
| 1908 | blob_append_char(renderer.pOut, '\n'); |
| 1909 | free(renderer.aStack); |
| 1910 | } |
| 1911 | |
| 1912 | /* |
| 1913 | ** COMMAND: test-wiki-render |
| 1914 | ** |
| 1915 | ** Usage: %fossil test-wiki-render FILE [OPTIONS] |
| @@ -1925,15 +2025,16 @@ | |
| 1925 | ** --linksonly Set the WIKI_LINKSONLY flag |
| 1926 | ** --md-span Allow markdown span syntax: links and emphasis marks |
| 1927 | ** --nobadlinks Set the WIKI_NOBADLINKS flag |
| 1928 | ** --noblock Set the WIKI_NOBLOCK flag |
| 1929 | ** --text Run the output through html_to_plaintext(). |
| 1930 | */ |
| 1931 | void test_wiki_render(void){ |
| 1932 | Blob in, out; |
| 1933 | int flags = 0; |
| 1934 | int bText; |
| 1935 | if( find_option("buttons",0,0)!=0 ) flags |= WIKI_BUTTONS; |
| 1936 | if( find_option("htmlonly",0,0)!=0 ) flags |= WIKI_HTMLONLY; |
| 1937 | if( find_option("linksonly",0,0)!=0 ) flags |= WIKI_LINKSONLY; |
| 1938 | if( find_option("nobadlinks",0,0)!=0 ) flags |= WIKI_NOBADLINKS; |
| 1939 | if( find_option("inline",0,0)!=0 ) flags |= WIKI_INLINE; |
| @@ -1941,22 +2042,27 @@ | |
| 1941 | if( find_option("md-span",0,0)!=0 ) flags |= WIKI_MARKDOWN_SPAN; |
| 1942 | if( find_option("dark-pikchr",0,0)!=0 ){ |
| 1943 | pikchr_to_html_add_flags( PIKCHR_PROCESS_DARK_MODE ); |
| 1944 | } |
| 1945 | bText = find_option("text",0,0)!=0; |
| 1946 | db_find_and_open_repository(OPEN_OK_NOT_FOUND|OPEN_SUBSTITUTE,0); |
| 1947 | verify_all_options(); |
| 1948 | if( g.argc!=3 ) usage("FILE"); |
| 1949 | blob_zero(&out); |
| 1950 | blob_read_from_file(&in, g.argv[2], ExtFILE); |
| 1951 | wiki_convert(&in, &out, flags); |
| 1952 | if( bText ){ |
| 1953 | Blob txt; |
| 1954 | blob_init(&txt, 0, 0); |
| 1955 | html_to_plaintext(blob_str(&out),&txt); |
| 1956 | blob_reset(&out); |
| 1957 | out = txt; |
| 1958 | } |
| 1959 | blob_write_to_file(&out, "-"); |
| 1960 | } |
| 1961 | |
| 1962 | /* |
| 1963 |
| --- src/wikiformat.c | |
| +++ src/wikiformat.c | |
| @@ -436,11 +436,30 @@ | |
| 436 | #define TOKEN_NUM_LI 7 /* " # " */ |
| 437 | #define TOKEN_ENUM 8 /* " \(?\d+[.)]? " */ |
| 438 | #define TOKEN_INDENT 9 /* " " */ |
| 439 | #define TOKEN_RAW 10 /* Output exactly (used when wiki-use-html==1) */ |
| 440 | #define TOKEN_AUTOLINK 11 /* <URL> */ |
| 441 | #define TOKEN_MDSPAN 12 /* Markdown span characters: * _ ` */ |
| 442 | #define TOKEN_BACKSLASH 13 /* A backslash-escape */ |
| 443 | #define TOKEN_TEXT 14 /* None of the above */ |
| 444 | |
| 445 | static const char *wiki_token_names[] = { "", |
| 446 | "MARKUP", |
| 447 | "CHARACTER", |
| 448 | "LINK", |
| 449 | "PARAGRAPH", |
| 450 | "NEWLINE", |
| 451 | "BUL_LI", |
| 452 | "NUM_LI", |
| 453 | "ENUM", |
| 454 | "INDENT", |
| 455 | "RAW", |
| 456 | "AUTOLINK", |
| 457 | "MDSPAN", |
| 458 | "BACKSLASH", |
| 459 | "TEXT", |
| 460 | }; |
| 461 | |
| 462 | /* |
| 463 | ** State flags. Save the lower 16 bits for the WIKI_* flags. |
| 464 | */ |
| 465 | #define AT_NEWLINE 0x0010000 /* At start of a line */ |
| @@ -545,20 +564,27 @@ | |
| 564 | ** |
| 565 | ** < |
| 566 | ** & |
| 567 | ** \n |
| 568 | ** [ |
| 569 | ** _ * ` \ <-- WIKI_MARKDOWN_SPAN only. |
| 570 | ** |
| 571 | ** The "[" is only considered if flags contain ALLOW_LINKS or ALLOW_WIKI. |
| 572 | ** The "\n" is only considered interesting if the flags constains ALLOW_WIKI. |
| 573 | ** The markdown span characters, _ * ` and \, are only considered if both |
| 574 | ** ALLOW_WIKI and WIKI_MARKDOWN_SPAN are set. |
| 575 | */ |
| 576 | static int textLength(const char *z, int flags){ |
| 577 | const char *zReject; |
| 578 | if( flags & ALLOW_WIKI ){ |
| 579 | if( flags & WIKI_MARKDOWN_SPAN ){ |
| 580 | zReject = "_*`\\\n[<&"; |
| 581 | }else{ |
| 582 | zReject = "\n[<&"; |
| 583 | } |
| 584 | }else if( flags & ALLOW_LINKS ){ |
| 585 | zReject = "[<&"; |
| 586 | }else{ |
| 587 | zReject = "<&"; |
| 588 | } |
| 589 | return strcspn(z, zReject); |
| 590 | } |
| @@ -675,10 +701,16 @@ | |
| 701 | /* |
| 702 | ** Get the next wiki token. |
| 703 | ** |
| 704 | ** z points to the start of a token. Return the number of |
| 705 | ** characters in that token. Write the token type into *pTokenType. |
| 706 | ** |
| 707 | ** Only wiki-style [target] links are recognized by this routine. |
| 708 | ** For markdown-style [display](target) links, though routine only |
| 709 | ** see the "[display]" part. But the caller will recognize that |
| 710 | ** "(target)" follows immediately afterwards and deal with that, if |
| 711 | ** markdown-style hyperlinks are enabled. |
| 712 | */ |
| 713 | static int nextWikiToken(const char *z, Renderer *p, int *pTokenType){ |
| 714 | int n; |
| 715 | if( z[0]=='<' ){ |
| 716 | n = html_tag_length(z); |
| @@ -685,10 +717,12 @@ | |
| 717 | if( n>0 ){ |
| 718 | *pTokenType = TOKEN_MARKUP; |
| 719 | return n; |
| 720 | } |
| 721 | if( z[1]=='h' |
| 722 | && !p->inVerbatim |
| 723 | && (p->state & (ALLOW_WIKI|ALLOW_LINKS))==(ALLOW_WIKI|ALLOW_LINKS) |
| 724 | && (strncmp(z,"<https://",9)==0 || strncmp(z,"<http://",8)==0) |
| 725 | ){ |
| 726 | for(n=8; z[n] && z[n]!='>'; n++){} |
| 727 | if( z[n]=='>' ){ |
| 728 | *pTokenType = TOKEN_AUTOLINK; |
| @@ -738,10 +772,22 @@ | |
| 772 | } |
| 773 | } |
| 774 | if( z[0]=='[' && (n = linkLength(z))>0 ){ |
| 775 | *pTokenType = TOKEN_LINK; |
| 776 | return n; |
| 777 | } |
| 778 | if( z[0]=='*' || z[0]=='_' || z[0]=='`' ){ |
| 779 | *pTokenType = TOKEN_MDSPAN; |
| 780 | return 1 + (z[1]==z[0]); |
| 781 | } |
| 782 | if( z[0]=='\\' ){ |
| 783 | if( z[1]==0 || fossil_isspace(z[1]) || (z[1]&0x80)!=0 ){ |
| 784 | *pTokenType = TOKEN_TEXT; |
| 785 | return 1; |
| 786 | } |
| 787 | *pTokenType = TOKEN_BACKSLASH; |
| 788 | return 2; |
| 789 | } |
| 790 | }else if( (p->state & ALLOW_LINKS)!=0 && z[0]=='[' && (n = linkLength(z))>0 ){ |
| 791 | *pTokenType = TOKEN_LINK; |
| 792 | return n; |
| 793 | } |
| @@ -752,10 +798,19 @@ | |
| 798 | /* |
| 799 | ** Parse only Wiki links, return everything else as TOKEN_RAW. |
| 800 | ** |
| 801 | ** z points to the start of a token. Return the number of |
| 802 | ** characters in that token. Write the token type into *pTokenType. |
| 803 | ** |
| 804 | ** Only wiki-style [target] links are recognized by this routine. |
| 805 | ** For markdown-style [display](target) links, though routine only |
| 806 | ** see the "[display]" part. But the caller will recognize that |
| 807 | ** "(target)" follows immediately afterwards and deal with that, if |
| 808 | ** markdown-style hyperlinks are enabled. |
| 809 | ** |
| 810 | ** Auto-links ("<URL>") are not recognized at all, since they are |
| 811 | ** not back-referenced. |
| 812 | */ |
| 813 | static int nextRawToken(const char *z, Renderer *p, int *pTokenType){ |
| 814 | int n; |
| 815 | if( z[0]=='[' && (n = linkLength(z))>0 ){ |
| 816 | *pTokenType = TOKEN_LINK; |
| @@ -1644,10 +1699,21 @@ | |
| 1699 | p->state = savedState; |
| 1700 | blob_append(p->pOut, zClose, -1); |
| 1701 | } |
| 1702 | break; |
| 1703 | } |
| 1704 | case TOKEN_BACKSLASH: { |
| 1705 | if( (p->state & WIKI_MARKDOWN_SPAN)==0 ){ |
| 1706 | /* Ignore backslashes in traditional Wiki */ |
| 1707 | blob_append_char(p->pOut, '\\'); |
| 1708 | n = 1; |
| 1709 | }else{ |
| 1710 | blob_append_char(p->pOut, z[1]); |
| 1711 | } |
| 1712 | break; |
| 1713 | } |
| 1714 | case TOKEN_MDSPAN: |
| 1715 | case TOKEN_TEXT: { |
| 1716 | int i; |
| 1717 | for(i=0; i<n && fossil_isspace(z[i]); i++){} |
| 1718 | if( i<n ) startAutoParagraph(p); |
| 1719 | blob_append(p->pOut, z, n); |
| @@ -1906,10 +1972,44 @@ | |
| 1972 | popStack(&renderer); |
| 1973 | } |
| 1974 | blob_append_char(renderer.pOut, '\n'); |
| 1975 | free(renderer.aStack); |
| 1976 | } |
| 1977 | |
| 1978 | /* |
| 1979 | ** Output a tokenization of the input file. Debugging use only. |
| 1980 | */ |
| 1981 | static void test_tokenize(Blob *pIn, Blob *pOut, int flags){ |
| 1982 | Renderer renderer; |
| 1983 | int tokenType; |
| 1984 | int n; |
| 1985 | int wikiHtmlOnly = (flags & (WIKI_HTMLONLY | WIKI_LINKSONLY))!=0; |
| 1986 | char *z = blob_str(pIn); |
| 1987 | |
| 1988 | /* Make sure the attribute constants and names still align |
| 1989 | ** following changes in the attribute list. */ |
| 1990 | assert( fossil_strcmp(aAttribute[ATTR_WIDTH].zName, "width")==0 ); |
| 1991 | |
| 1992 | memset(&renderer, 0, sizeof(renderer)); |
| 1993 | renderer.renderFlags = flags; |
| 1994 | renderer.state = ALLOW_WIKI|flags; |
| 1995 | while( z[0] ){ |
| 1996 | char cSave; |
| 1997 | if( wikiHtmlOnly ){ |
| 1998 | n = nextRawToken(z, &renderer, &tokenType); |
| 1999 | }else{ |
| 2000 | n = nextWikiToken(z, &renderer, &tokenType); |
| 2001 | } |
| 2002 | cSave = z[n]; |
| 2003 | z[n] = 0; |
| 2004 | blob_appendf(pOut, "%-12s %z\n", |
| 2005 | wiki_token_names[tokenType], |
| 2006 | encode_json_string_literal(z, 1, 0)); |
| 2007 | z[n] = cSave; |
| 2008 | z += n; |
| 2009 | } |
| 2010 | } |
| 2011 | |
| 2012 | /* |
| 2013 | ** COMMAND: test-wiki-render |
| 2014 | ** |
| 2015 | ** Usage: %fossil test-wiki-render FILE [OPTIONS] |
| @@ -1925,15 +2025,16 @@ | |
| 2025 | ** --linksonly Set the WIKI_LINKSONLY flag |
| 2026 | ** --md-span Allow markdown span syntax: links and emphasis marks |
| 2027 | ** --nobadlinks Set the WIKI_NOBADLINKS flag |
| 2028 | ** --noblock Set the WIKI_NOBLOCK flag |
| 2029 | ** --text Run the output through html_to_plaintext(). |
| 2030 | ** --tokenize Output a tokenization of the input file |
| 2031 | */ |
| 2032 | void test_wiki_render(void){ |
| 2033 | Blob in, out; |
| 2034 | int flags = 0; |
| 2035 | int bText, bTokenize; |
| 2036 | if( find_option("buttons",0,0)!=0 ) flags |= WIKI_BUTTONS; |
| 2037 | if( find_option("htmlonly",0,0)!=0 ) flags |= WIKI_HTMLONLY; |
| 2038 | if( find_option("linksonly",0,0)!=0 ) flags |= WIKI_LINKSONLY; |
| 2039 | if( find_option("nobadlinks",0,0)!=0 ) flags |= WIKI_NOBADLINKS; |
| 2040 | if( find_option("inline",0,0)!=0 ) flags |= WIKI_INLINE; |
| @@ -1941,22 +2042,27 @@ | |
| 2042 | if( find_option("md-span",0,0)!=0 ) flags |= WIKI_MARKDOWN_SPAN; |
| 2043 | if( find_option("dark-pikchr",0,0)!=0 ){ |
| 2044 | pikchr_to_html_add_flags( PIKCHR_PROCESS_DARK_MODE ); |
| 2045 | } |
| 2046 | bText = find_option("text",0,0)!=0; |
| 2047 | bTokenize = find_option("tokenize",0,0)!=0; |
| 2048 | db_find_and_open_repository(OPEN_OK_NOT_FOUND|OPEN_SUBSTITUTE,0); |
| 2049 | verify_all_options(); |
| 2050 | if( g.argc!=3 ) usage("FILE"); |
| 2051 | blob_zero(&out); |
| 2052 | blob_read_from_file(&in, g.argv[2], ExtFILE); |
| 2053 | if( bTokenize ){ |
| 2054 | test_tokenize(&in, &out, flags); |
| 2055 | }else{ |
| 2056 | wiki_convert(&in, &out, flags); |
| 2057 | if( bText ){ |
| 2058 | Blob txt; |
| 2059 | blob_init(&txt, 0, 0); |
| 2060 | html_to_plaintext(blob_str(&out),&txt); |
| 2061 | blob_reset(&out); |
| 2062 | out = txt; |
| 2063 | } |
| 2064 | } |
| 2065 | blob_write_to_file(&out, "-"); |
| 2066 | } |
| 2067 | |
| 2068 | /* |
| 2069 |