Fossil SCM
More refinement of the token selection for #hashtag and @name references.
Commit
3363ab42c0bb1ac83c8e66df5b4e344cb67c62b81f73df3629651170dd2d7589
Parent
e211f1ab429f94c…
2 files changed
+42
-10
+6
-4
+42
-10
| --- src/markdown.c | ||
| +++ src/markdown.c | ||
| @@ -21,11 +21,10 @@ | ||
| 21 | 21 | |
| 22 | 22 | #include "config.h" |
| 23 | 23 | #include "markdown.h" |
| 24 | 24 | |
| 25 | 25 | #include <assert.h> |
| 26 | -#include <ctype.h> | |
| 27 | 26 | #include <string.h> |
| 28 | 27 | #include <stdlib.h> |
| 29 | 28 | |
| 30 | 29 | #define MKD_LI_END 8 /* internal list flag */ |
| 31 | 30 | |
| @@ -44,11 +43,11 @@ | ||
| 44 | 43 | }; |
| 45 | 44 | |
| 46 | 45 | /* mkd_tagspan -- type of tagged <span> */ |
| 47 | 46 | enum mkd_tagspan { |
| 48 | 47 | MKDT_ATREF, /* @name references, as in /chat attention targeting */ |
| 49 | - MKDT_HASH, /* #hash tags, message IDs, etc. */ | |
| 48 | + MKDT_HASHTAG, /* #hash tags, message IDs, etc. */ | |
| 50 | 49 | }; |
| 51 | 50 | |
| 52 | 51 | /* mkd_renderer -- functions for rendering parsed data */ |
| 53 | 52 | struct mkd_renderer { |
| 54 | 53 | /* document level callbacks */ |
| @@ -891,18 +890,35 @@ | ||
| 891 | 890 | size_t size |
| 892 | 891 | ){ |
| 893 | 892 | size_t end; |
| 894 | 893 | struct Blob work = BLOB_INITIALIZER; |
| 895 | 894 | |
| 896 | - if (size < 2 || !isalpha(data[1])) return 0; | |
| 897 | - for (end = 2; (end < size) && isalnum(data[end]); ++end) /* */ ; | |
| 898 | - | |
| 895 | + if(offset>0 && !fossil_isspace(data[-1])){ | |
| 896 | + /* Only ever match if the *previous* character is | |
| 897 | + whitespace or we're at the start of the input. */ | |
| 898 | + return 0; | |
| 899 | + } | |
| 900 | + /*fprintf(stderr,"@-REF: %.*s\n", (int)size, data);*/ | |
| 901 | + if (size < 2 || !fossil_isalpha(data[1])) return 0; | |
| 902 | + for (end = 2; (end < size) | |
| 903 | + && (fossil_isalnum(data[end]) | |
| 904 | + /* TODO: email addresses are legal fossil user names, but | |
| 905 | + parsing those is beyond our current ambitions. | |
| 906 | + Similarly, non-ASCII names are legal, but not | |
| 907 | + currently handled here. */ | |
| 908 | + /*|| data[end] == '.' || data[end] == '_' | |
| 909 | + || data[end] == '-'*/); | |
| 910 | + ++end); | |
| 911 | + if(end<size){ | |
| 912 | + if(!fossil_isspace(data[end])){ | |
| 913 | + return 0; | |
| 914 | + } | |
| 915 | + } | |
| 899 | 916 | blob_init(&work, data + 1, end - 1); |
| 900 | 917 | rndr->make.tagspan(ob, &work, MKDT_ATREF, rndr->make.opaque); |
| 901 | 918 | return end; |
| 902 | 919 | } |
| 903 | - | |
| 904 | 920 | |
| 905 | 921 | /* char_hashref_tag -- '#' followed by "word" characters to tag |
| 906 | 922 | * post numbers, hashtags, etc. */ |
| 907 | 923 | static size_t char_hashref_tag( |
| 908 | 924 | struct Blob *ob, |
| @@ -912,16 +928,32 @@ | ||
| 912 | 928 | size_t size |
| 913 | 929 | ){ |
| 914 | 930 | size_t end; |
| 915 | 931 | struct Blob work = BLOB_INITIALIZER; |
| 916 | 932 | |
| 917 | - if (size < 2 || !isalnum(data[1])) return 0; | |
| 918 | - for (end = 2; (end < size) && (isalnum(data[end]) || data[end] == '.'); ++end) /* */ ; | |
| 919 | - | |
| 933 | + if(offset>0 && !fossil_isspace(data[-1])){ | |
| 934 | + /* Only ever match if the *previous* character is | |
| 935 | + whitespace or we're at the start of the input. */ | |
| 936 | + return 0; | |
| 937 | + } | |
| 938 | + if(size < 2 || !fossil_isalnum(data[1])) return 0; | |
| 939 | + /*fprintf(stderr,"HASHREF: %.*s\n", (int)size, data);*/ | |
| 940 | + for (end = 2; (end < size) && fossil_isalnum(data[end]); ++end); | |
| 941 | + /*TODO: in order to support detection of forum post-style | |
| 942 | + references, we need to recognize #X.Y, but only when X and Y are | |
| 943 | + both purely numeric and Y ends on a word/sentence | |
| 944 | + boundary.*/ | |
| 945 | + if(end<size){ | |
| 946 | + /* Only match if we end at a dot or space or end of input */ | |
| 947 | + if(data[end]!='.' && !fossil_isspace(data[end])){ | |
| 948 | + return 0; | |
| 949 | + } | |
| 950 | + } | |
| 920 | 951 | blob_init(&work, data + 1, end - 1); |
| 921 | - rndr->make.tagspan(ob, &work, MKDT_HASH, rndr->make.opaque); | |
| 952 | + rndr->make.tagspan(ob, &work, MKDT_HASHTAG, rndr->make.opaque); | |
| 922 | 953 | return end; |
| 954 | + return 0; | |
| 923 | 955 | } |
| 924 | 956 | |
| 925 | 957 | |
| 926 | 958 | /* char_langle_tag -- '<' when tags or autolinks are allowed */ |
| 927 | 959 | static size_t char_langle_tag( |
| 928 | 960 |
| --- src/markdown.c | |
| +++ src/markdown.c | |
| @@ -21,11 +21,10 @@ | |
| 21 | |
| 22 | #include "config.h" |
| 23 | #include "markdown.h" |
| 24 | |
| 25 | #include <assert.h> |
| 26 | #include <ctype.h> |
| 27 | #include <string.h> |
| 28 | #include <stdlib.h> |
| 29 | |
| 30 | #define MKD_LI_END 8 /* internal list flag */ |
| 31 | |
| @@ -44,11 +43,11 @@ | |
| 44 | }; |
| 45 | |
| 46 | /* mkd_tagspan -- type of tagged <span> */ |
| 47 | enum mkd_tagspan { |
| 48 | MKDT_ATREF, /* @name references, as in /chat attention targeting */ |
| 49 | MKDT_HASH, /* #hash tags, message IDs, etc. */ |
| 50 | }; |
| 51 | |
| 52 | /* mkd_renderer -- functions for rendering parsed data */ |
| 53 | struct mkd_renderer { |
| 54 | /* document level callbacks */ |
| @@ -891,18 +890,35 @@ | |
| 891 | size_t size |
| 892 | ){ |
| 893 | size_t end; |
| 894 | struct Blob work = BLOB_INITIALIZER; |
| 895 | |
| 896 | if (size < 2 || !isalpha(data[1])) return 0; |
| 897 | for (end = 2; (end < size) && isalnum(data[end]); ++end) /* */ ; |
| 898 | |
| 899 | blob_init(&work, data + 1, end - 1); |
| 900 | rndr->make.tagspan(ob, &work, MKDT_ATREF, rndr->make.opaque); |
| 901 | return end; |
| 902 | } |
| 903 | |
| 904 | |
| 905 | /* char_hashref_tag -- '#' followed by "word" characters to tag |
| 906 | * post numbers, hashtags, etc. */ |
| 907 | static size_t char_hashref_tag( |
| 908 | struct Blob *ob, |
| @@ -912,16 +928,32 @@ | |
| 912 | size_t size |
| 913 | ){ |
| 914 | size_t end; |
| 915 | struct Blob work = BLOB_INITIALIZER; |
| 916 | |
| 917 | if (size < 2 || !isalnum(data[1])) return 0; |
| 918 | for (end = 2; (end < size) && (isalnum(data[end]) || data[end] == '.'); ++end) /* */ ; |
| 919 | |
| 920 | blob_init(&work, data + 1, end - 1); |
| 921 | rndr->make.tagspan(ob, &work, MKDT_HASH, rndr->make.opaque); |
| 922 | return end; |
| 923 | } |
| 924 | |
| 925 | |
| 926 | /* char_langle_tag -- '<' when tags or autolinks are allowed */ |
| 927 | static size_t char_langle_tag( |
| 928 |
| --- src/markdown.c | |
| +++ src/markdown.c | |
| @@ -21,11 +21,10 @@ | |
| 21 | |
| 22 | #include "config.h" |
| 23 | #include "markdown.h" |
| 24 | |
| 25 | #include <assert.h> |
| 26 | #include <string.h> |
| 27 | #include <stdlib.h> |
| 28 | |
| 29 | #define MKD_LI_END 8 /* internal list flag */ |
| 30 | |
| @@ -44,11 +43,11 @@ | |
| 43 | }; |
| 44 | |
| 45 | /* mkd_tagspan -- type of tagged <span> */ |
| 46 | enum mkd_tagspan { |
| 47 | MKDT_ATREF, /* @name references, as in /chat attention targeting */ |
| 48 | MKDT_HASHTAG, /* #hash tags, message IDs, etc. */ |
| 49 | }; |
| 50 | |
| 51 | /* mkd_renderer -- functions for rendering parsed data */ |
| 52 | struct mkd_renderer { |
| 53 | /* document level callbacks */ |
| @@ -891,18 +890,35 @@ | |
| 890 | size_t size |
| 891 | ){ |
| 892 | size_t end; |
| 893 | struct Blob work = BLOB_INITIALIZER; |
| 894 | |
| 895 | if(offset>0 && !fossil_isspace(data[-1])){ |
| 896 | /* Only ever match if the *previous* character is |
| 897 | whitespace or we're at the start of the input. */ |
| 898 | return 0; |
| 899 | } |
| 900 | /*fprintf(stderr,"@-REF: %.*s\n", (int)size, data);*/ |
| 901 | if (size < 2 || !fossil_isalpha(data[1])) return 0; |
| 902 | for (end = 2; (end < size) |
| 903 | && (fossil_isalnum(data[end]) |
| 904 | /* TODO: email addresses are legal fossil user names, but |
| 905 | parsing those is beyond our current ambitions. |
| 906 | Similarly, non-ASCII names are legal, but not |
| 907 | currently handled here. */ |
| 908 | /*|| data[end] == '.' || data[end] == '_' |
| 909 | || data[end] == '-'*/); |
| 910 | ++end); |
| 911 | if(end<size){ |
| 912 | if(!fossil_isspace(data[end])){ |
| 913 | return 0; |
| 914 | } |
| 915 | } |
| 916 | blob_init(&work, data + 1, end - 1); |
| 917 | rndr->make.tagspan(ob, &work, MKDT_ATREF, rndr->make.opaque); |
| 918 | return end; |
| 919 | } |
| 920 | |
| 921 | /* char_hashref_tag -- '#' followed by "word" characters to tag |
| 922 | * post numbers, hashtags, etc. */ |
| 923 | static size_t char_hashref_tag( |
| 924 | struct Blob *ob, |
| @@ -912,16 +928,32 @@ | |
| 928 | size_t size |
| 929 | ){ |
| 930 | size_t end; |
| 931 | struct Blob work = BLOB_INITIALIZER; |
| 932 | |
| 933 | if(offset>0 && !fossil_isspace(data[-1])){ |
| 934 | /* Only ever match if the *previous* character is |
| 935 | whitespace or we're at the start of the input. */ |
| 936 | return 0; |
| 937 | } |
| 938 | if(size < 2 || !fossil_isalnum(data[1])) return 0; |
| 939 | /*fprintf(stderr,"HASHREF: %.*s\n", (int)size, data);*/ |
| 940 | for (end = 2; (end < size) && fossil_isalnum(data[end]); ++end); |
| 941 | /*TODO: in order to support detection of forum post-style |
| 942 | references, we need to recognize #X.Y, but only when X and Y are |
| 943 | both purely numeric and Y ends on a word/sentence |
| 944 | boundary.*/ |
| 945 | if(end<size){ |
| 946 | /* Only match if we end at a dot or space or end of input */ |
| 947 | if(data[end]!='.' && !fossil_isspace(data[end])){ |
| 948 | return 0; |
| 949 | } |
| 950 | } |
| 951 | blob_init(&work, data + 1, end - 1); |
| 952 | rndr->make.tagspan(ob, &work, MKDT_HASHTAG, rndr->make.opaque); |
| 953 | return end; |
| 954 | return 0; |
| 955 | } |
| 956 | |
| 957 | |
| 958 | /* char_langle_tag -- '<' when tags or autolinks are allowed */ |
| 959 | static size_t char_langle_tag( |
| 960 |
+6
-4
| --- src/markdown_html.c | ||
| +++ src/markdown_html.c | ||
| @@ -547,20 +547,22 @@ | ||
| 547 | 547 | void *opaque |
| 548 | 548 | ){ |
| 549 | 549 | if( text==0 ){ |
| 550 | 550 | /* no-op */ |
| 551 | 551 | }else{ |
| 552 | - char c; | |
| 552 | + char cPrefix = '!'; | |
| 553 | 553 | BLOB_APPEND_LITERAL(ob, "<span data-"); |
| 554 | 554 | switch (type) { |
| 555 | - case MKDT_ATREF: c='@'; BLOB_APPEND_LITERAL(ob, "atref"); break; | |
| 556 | - case MKDT_HASH: c='#'; BLOB_APPEND_LITERAL(ob, "hash"); break; | |
| 555 | + case MKDT_ATREF: | |
| 556 | + cPrefix = '@'; BLOB_APPEND_LITERAL(ob, "atref"); break; | |
| 557 | + case MKDT_HASHTAG: | |
| 558 | + cPrefix = '#'; BLOB_APPEND_LITERAL(ob, "hashtag"); break; | |
| 557 | 559 | } |
| 558 | 560 | BLOB_APPEND_LITERAL(ob, "=\""); |
| 559 | 561 | html_quote(ob, blob_buffer(text), blob_size(text)); |
| 560 | 562 | BLOB_APPEND_LITERAL(ob, "\""); |
| 561 | - blob_appendf(ob, ">%c%b</span>", c, text); | |
| 563 | + blob_appendf(ob, ">%c%b</span>", cPrefix,text); | |
| 562 | 564 | } |
| 563 | 565 | return 1; |
| 564 | 566 | } |
| 565 | 567 | |
| 566 | 568 | static int html_triple_emphasis( |
| 567 | 569 |
| --- src/markdown_html.c | |
| +++ src/markdown_html.c | |
| @@ -547,20 +547,22 @@ | |
| 547 | void *opaque |
| 548 | ){ |
| 549 | if( text==0 ){ |
| 550 | /* no-op */ |
| 551 | }else{ |
| 552 | char c; |
| 553 | BLOB_APPEND_LITERAL(ob, "<span data-"); |
| 554 | switch (type) { |
| 555 | case MKDT_ATREF: c='@'; BLOB_APPEND_LITERAL(ob, "atref"); break; |
| 556 | case MKDT_HASH: c='#'; BLOB_APPEND_LITERAL(ob, "hash"); break; |
| 557 | } |
| 558 | BLOB_APPEND_LITERAL(ob, "=\""); |
| 559 | html_quote(ob, blob_buffer(text), blob_size(text)); |
| 560 | BLOB_APPEND_LITERAL(ob, "\""); |
| 561 | blob_appendf(ob, ">%c%b</span>", c, text); |
| 562 | } |
| 563 | return 1; |
| 564 | } |
| 565 | |
| 566 | static int html_triple_emphasis( |
| 567 |
| --- src/markdown_html.c | |
| +++ src/markdown_html.c | |
| @@ -547,20 +547,22 @@ | |
| 547 | void *opaque |
| 548 | ){ |
| 549 | if( text==0 ){ |
| 550 | /* no-op */ |
| 551 | }else{ |
| 552 | char cPrefix = '!'; |
| 553 | BLOB_APPEND_LITERAL(ob, "<span data-"); |
| 554 | switch (type) { |
| 555 | case MKDT_ATREF: |
| 556 | cPrefix = '@'; BLOB_APPEND_LITERAL(ob, "atref"); break; |
| 557 | case MKDT_HASHTAG: |
| 558 | cPrefix = '#'; BLOB_APPEND_LITERAL(ob, "hashtag"); break; |
| 559 | } |
| 560 | BLOB_APPEND_LITERAL(ob, "=\""); |
| 561 | html_quote(ob, blob_buffer(text), blob_size(text)); |
| 562 | BLOB_APPEND_LITERAL(ob, "\""); |
| 563 | blob_appendf(ob, ">%c%b</span>", cPrefix,text); |
| 564 | } |
| 565 | return 1; |
| 566 | } |
| 567 | |
| 568 | static int html_triple_emphasis( |
| 569 |