Fossil SCM

More refinement of the token selection for #hashtag and @name references.

stephan 2021-09-21 17:34 markdown-tagrefs
Commit 3363ab42c0bb1ac83c8e66df5b4e344cb67c62b81f73df3629651170dd2d7589
2 files changed +42 -10 +6 -4
+42 -10
--- src/markdown.c
+++ src/markdown.c
@@ -21,11 +21,10 @@
2121
2222
#include "config.h"
2323
#include "markdown.h"
2424
2525
#include <assert.h>
26
-#include <ctype.h>
2726
#include <string.h>
2827
#include <stdlib.h>
2928
3029
#define MKD_LI_END 8 /* internal list flag */
3130
@@ -44,11 +43,11 @@
4443
};
4544
4645
/* mkd_tagspan -- type of tagged <span> */
4746
enum mkd_tagspan {
4847
MKDT_ATREF, /* @name references, as in /chat attention targeting */
49
- MKDT_HASH, /* #hash tags, message IDs, etc. */
48
+ MKDT_HASHTAG, /* #hash tags, message IDs, etc. */
5049
};
5150
5251
/* mkd_renderer -- functions for rendering parsed data */
5352
struct mkd_renderer {
5453
/* document level callbacks */
@@ -891,18 +890,35 @@
891890
size_t size
892891
){
893892
size_t end;
894893
struct Blob work = BLOB_INITIALIZER;
895894
896
- if (size < 2 || !isalpha(data[1])) return 0;
897
- for (end = 2; (end < size) && isalnum(data[end]); ++end) /* */ ;
898
-
895
+ if(offset>0 && !fossil_isspace(data[-1])){
896
+ /* Only ever match if the *previous* character is
897
+ whitespace or we're at the start of the input. */
898
+ return 0;
899
+ }
900
+ /*fprintf(stderr,"@-REF: %.*s\n", (int)size, data);*/
901
+ if (size < 2 || !fossil_isalpha(data[1])) return 0;
902
+ for (end = 2; (end < size)
903
+ && (fossil_isalnum(data[end])
904
+ /* TODO: email addresses are legal fossil user names, but
905
+ parsing those is beyond our current ambitions.
906
+ Similarly, non-ASCII names are legal, but not
907
+ currently handled here. */
908
+ /*|| data[end] == '.' || data[end] == '_'
909
+ || data[end] == '-'*/);
910
+ ++end);
911
+ if(end<size){
912
+ if(!fossil_isspace(data[end])){
913
+ return 0;
914
+ }
915
+ }
899916
blob_init(&work, data + 1, end - 1);
900917
rndr->make.tagspan(ob, &work, MKDT_ATREF, rndr->make.opaque);
901918
return end;
902919
}
903
-
904920
905921
/* char_hashref_tag -- '#' followed by "word" characters to tag
906922
* post numbers, hashtags, etc. */
907923
static size_t char_hashref_tag(
908924
struct Blob *ob,
@@ -912,16 +928,32 @@
912928
size_t size
913929
){
914930
size_t end;
915931
struct Blob work = BLOB_INITIALIZER;
916932
917
- if (size < 2 || !isalnum(data[1])) return 0;
918
- for (end = 2; (end < size) && (isalnum(data[end]) || data[end] == '.'); ++end) /* */ ;
919
-
933
+ if(offset>0 && !fossil_isspace(data[-1])){
934
+ /* Only ever match if the *previous* character is
935
+ whitespace or we're at the start of the input. */
936
+ return 0;
937
+ }
938
+ if(size < 2 || !fossil_isalnum(data[1])) return 0;
939
+ /*fprintf(stderr,"HASHREF: %.*s\n", (int)size, data);*/
940
+ for (end = 2; (end < size) && fossil_isalnum(data[end]); ++end);
941
+ /*TODO: in order to support detection of forum post-style
942
+ references, we need to recognize #X.Y, but only when X and Y are
943
+ both purely numeric and Y ends on a word/sentence
944
+ boundary.*/
945
+ if(end<size){
946
+ /* Only match if we end at a dot or space or end of input */
947
+ if(data[end]!='.' && !fossil_isspace(data[end])){
948
+ return 0;
949
+ }
950
+ }
920951
blob_init(&work, data + 1, end - 1);
921
- rndr->make.tagspan(ob, &work, MKDT_HASH, rndr->make.opaque);
952
+ rndr->make.tagspan(ob, &work, MKDT_HASHTAG, rndr->make.opaque);
922953
return end;
954
+ return 0;
923955
}
924956
925957
926958
/* char_langle_tag -- '<' when tags or autolinks are allowed */
927959
static size_t char_langle_tag(
928960
--- src/markdown.c
+++ src/markdown.c
@@ -21,11 +21,10 @@
21
22 #include "config.h"
23 #include "markdown.h"
24
25 #include <assert.h>
26 #include <ctype.h>
27 #include <string.h>
28 #include <stdlib.h>
29
30 #define MKD_LI_END 8 /* internal list flag */
31
@@ -44,11 +43,11 @@
44 };
45
46 /* mkd_tagspan -- type of tagged <span> */
47 enum mkd_tagspan {
48 MKDT_ATREF, /* @name references, as in /chat attention targeting */
49 MKDT_HASH, /* #hash tags, message IDs, etc. */
50 };
51
52 /* mkd_renderer -- functions for rendering parsed data */
53 struct mkd_renderer {
54 /* document level callbacks */
@@ -891,18 +890,35 @@
891 size_t size
892 ){
893 size_t end;
894 struct Blob work = BLOB_INITIALIZER;
895
896 if (size < 2 || !isalpha(data[1])) return 0;
897 for (end = 2; (end < size) && isalnum(data[end]); ++end) /* */ ;
898
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
899 blob_init(&work, data + 1, end - 1);
900 rndr->make.tagspan(ob, &work, MKDT_ATREF, rndr->make.opaque);
901 return end;
902 }
903
904
905 /* char_hashref_tag -- '#' followed by "word" characters to tag
906 * post numbers, hashtags, etc. */
907 static size_t char_hashref_tag(
908 struct Blob *ob,
@@ -912,16 +928,32 @@
912 size_t size
913 ){
914 size_t end;
915 struct Blob work = BLOB_INITIALIZER;
916
917 if (size < 2 || !isalnum(data[1])) return 0;
918 for (end = 2; (end < size) && (isalnum(data[end]) || data[end] == '.'); ++end) /* */ ;
919
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
920 blob_init(&work, data + 1, end - 1);
921 rndr->make.tagspan(ob, &work, MKDT_HASH, rndr->make.opaque);
922 return end;
 
923 }
924
925
926 /* char_langle_tag -- '<' when tags or autolinks are allowed */
927 static size_t char_langle_tag(
928
--- src/markdown.c
+++ src/markdown.c
@@ -21,11 +21,10 @@
21
22 #include "config.h"
23 #include "markdown.h"
24
25 #include <assert.h>
 
26 #include <string.h>
27 #include <stdlib.h>
28
29 #define MKD_LI_END 8 /* internal list flag */
30
@@ -44,11 +43,11 @@
43 };
44
45 /* mkd_tagspan -- type of tagged <span> */
46 enum mkd_tagspan {
47 MKDT_ATREF, /* @name references, as in /chat attention targeting */
48 MKDT_HASHTAG, /* #hash tags, message IDs, etc. */
49 };
50
51 /* mkd_renderer -- functions for rendering parsed data */
52 struct mkd_renderer {
53 /* document level callbacks */
@@ -891,18 +890,35 @@
890 size_t size
891 ){
892 size_t end;
893 struct Blob work = BLOB_INITIALIZER;
894
895 if(offset>0 && !fossil_isspace(data[-1])){
896 /* Only ever match if the *previous* character is
897 whitespace or we're at the start of the input. */
898 return 0;
899 }
900 /*fprintf(stderr,"@-REF: %.*s\n", (int)size, data);*/
901 if (size < 2 || !fossil_isalpha(data[1])) return 0;
902 for (end = 2; (end < size)
903 && (fossil_isalnum(data[end])
904 /* TODO: email addresses are legal fossil user names, but
905 parsing those is beyond our current ambitions.
906 Similarly, non-ASCII names are legal, but not
907 currently handled here. */
908 /*|| data[end] == '.' || data[end] == '_'
909 || data[end] == '-'*/);
910 ++end);
911 if(end<size){
912 if(!fossil_isspace(data[end])){
913 return 0;
914 }
915 }
916 blob_init(&work, data + 1, end - 1);
917 rndr->make.tagspan(ob, &work, MKDT_ATREF, rndr->make.opaque);
918 return end;
919 }
 
920
921 /* char_hashref_tag -- '#' followed by "word" characters to tag
922 * post numbers, hashtags, etc. */
923 static size_t char_hashref_tag(
924 struct Blob *ob,
@@ -912,16 +928,32 @@
928 size_t size
929 ){
930 size_t end;
931 struct Blob work = BLOB_INITIALIZER;
932
933 if(offset>0 && !fossil_isspace(data[-1])){
934 /* Only ever match if the *previous* character is
935 whitespace or we're at the start of the input. */
936 return 0;
937 }
938 if(size < 2 || !fossil_isalnum(data[1])) return 0;
939 /*fprintf(stderr,"HASHREF: %.*s\n", (int)size, data);*/
940 for (end = 2; (end < size) && fossil_isalnum(data[end]); ++end);
941 /*TODO: in order to support detection of forum post-style
942 references, we need to recognize #X.Y, but only when X and Y are
943 both purely numeric and Y ends on a word/sentence
944 boundary.*/
945 if(end<size){
946 /* Only match if we end at a dot or space or end of input */
947 if(data[end]!='.' && !fossil_isspace(data[end])){
948 return 0;
949 }
950 }
951 blob_init(&work, data + 1, end - 1);
952 rndr->make.tagspan(ob, &work, MKDT_HASHTAG, rndr->make.opaque);
953 return end;
954 return 0;
955 }
956
957
958 /* char_langle_tag -- '<' when tags or autolinks are allowed */
959 static size_t char_langle_tag(
960
--- src/markdown_html.c
+++ src/markdown_html.c
@@ -547,20 +547,22 @@
547547
void *opaque
548548
){
549549
if( text==0 ){
550550
/* no-op */
551551
}else{
552
- char c;
552
+ char cPrefix = '!';
553553
BLOB_APPEND_LITERAL(ob, "<span data-");
554554
switch (type) {
555
- case MKDT_ATREF: c='@'; BLOB_APPEND_LITERAL(ob, "atref"); break;
556
- case MKDT_HASH: c='#'; BLOB_APPEND_LITERAL(ob, "hash"); break;
555
+ case MKDT_ATREF:
556
+ cPrefix = '@'; BLOB_APPEND_LITERAL(ob, "atref"); break;
557
+ case MKDT_HASHTAG:
558
+ cPrefix = '#'; BLOB_APPEND_LITERAL(ob, "hashtag"); break;
557559
}
558560
BLOB_APPEND_LITERAL(ob, "=\"");
559561
html_quote(ob, blob_buffer(text), blob_size(text));
560562
BLOB_APPEND_LITERAL(ob, "\"");
561
- blob_appendf(ob, ">%c%b</span>", c, text);
563
+ blob_appendf(ob, ">%c%b</span>", cPrefix,text);
562564
}
563565
return 1;
564566
}
565567
566568
static int html_triple_emphasis(
567569
--- src/markdown_html.c
+++ src/markdown_html.c
@@ -547,20 +547,22 @@
547 void *opaque
548 ){
549 if( text==0 ){
550 /* no-op */
551 }else{
552 char c;
553 BLOB_APPEND_LITERAL(ob, "<span data-");
554 switch (type) {
555 case MKDT_ATREF: c='@'; BLOB_APPEND_LITERAL(ob, "atref"); break;
556 case MKDT_HASH: c='#'; BLOB_APPEND_LITERAL(ob, "hash"); break;
 
 
557 }
558 BLOB_APPEND_LITERAL(ob, "=\"");
559 html_quote(ob, blob_buffer(text), blob_size(text));
560 BLOB_APPEND_LITERAL(ob, "\"");
561 blob_appendf(ob, ">%c%b</span>", c, text);
562 }
563 return 1;
564 }
565
566 static int html_triple_emphasis(
567
--- src/markdown_html.c
+++ src/markdown_html.c
@@ -547,20 +547,22 @@
547 void *opaque
548 ){
549 if( text==0 ){
550 /* no-op */
551 }else{
552 char cPrefix = '!';
553 BLOB_APPEND_LITERAL(ob, "<span data-");
554 switch (type) {
555 case MKDT_ATREF:
556 cPrefix = '@'; BLOB_APPEND_LITERAL(ob, "atref"); break;
557 case MKDT_HASHTAG:
558 cPrefix = '#'; BLOB_APPEND_LITERAL(ob, "hashtag"); break;
559 }
560 BLOB_APPEND_LITERAL(ob, "=\"");
561 html_quote(ob, blob_buffer(text), blob_size(text));
562 BLOB_APPEND_LITERAL(ob, "\"");
563 blob_appendf(ob, ">%c%b</span>", cPrefix,text);
564 }
565 return 1;
566 }
567
568 static int html_triple_emphasis(
569

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button