Fossil SCM
Do a better job of escaping stray HTML markup in snippet text.
Commit
bc8075de5fa2a43a6ac44fa45999d22eecbdfcb7
Parent
13d93e404a2d65c…
1 file changed
+51
-2
+51
-2
| --- src/search.c | ||
| +++ src/search.c | ||
| @@ -744,11 +744,11 @@ | ||
| 744 | 744 | "INSERT INTO x(label,url,score,date,snip) " |
| 745 | 745 | " SELECT ftsdocs.label," |
| 746 | 746 | " ftsdocs.url," |
| 747 | 747 | " rank(matchinfo(ftsidx,'pcsx'))," |
| 748 | 748 | " datetime(ftsdocs.mtime)," |
| 749 | - " snippet(ftsidx,'<mark>','</mark>')" | |
| 749 | + " snippet(ftsidx,'<mark>','</mark>',' ... ')" | |
| 750 | 750 | " FROM ftsidx, ftsdocs" |
| 751 | 751 | " WHERE ftsidx MATCH %Q" |
| 752 | 752 | " AND ftsdocs.rowid=ftsidx.docid", |
| 753 | 753 | zPattern |
| 754 | 754 | ); |
| @@ -772,10 +772,59 @@ | ||
| 772 | 772 | db_multi_exec("%s",blob_str(&sql)/*safe-for-%s*/); |
| 773 | 773 | #if SEARCH_DEBUG_RANK |
| 774 | 774 | db_multi_exec("UPDATE x SET label=printf('%%s (score=%%s)',label,score)"); |
| 775 | 775 | #endif |
| 776 | 776 | } |
| 777 | + | |
| 778 | +/* | |
| 779 | +** If z[] is of the form "<mark>TEXT</mark>" where TEXT contains | |
| 780 | +** no white-space or punctuation, then return the length of the mark. | |
| 781 | +** If | |
| 782 | +*/ | |
| 783 | +static int isSnippetMark(const char *z){ | |
| 784 | + int n; | |
| 785 | + if( strncmp(z,"<mark>",6)!=0 ) return 0; | |
| 786 | + n = 6; | |
| 787 | + while( fossil_isalnum(z[n]) ) n++; | |
| 788 | + if( strncmp(&z[n],"</mark>",7)!=0 ) return 0; | |
| 789 | + return n+7; | |
| 790 | +} | |
| 791 | + | |
| 792 | +/* | |
| 793 | +** Return a copy of zSnip (in memory obtained from fossil_malloc()) that | |
| 794 | +** has all "<" characters, other than those on <mark> and </mark>, | |
| 795 | +** converted into "<". This is similar to htmlize() except that | |
| 796 | +** <mark> and </mark> are preserved. | |
| 797 | +*/ | |
| 798 | +static char *cleanSnippet(const char *zSnip){ | |
| 799 | + int i; | |
| 800 | + int n = 0; | |
| 801 | + char *z; | |
| 802 | + for(i=0; zSnip[i]; i++) if( zSnip[i]=='<' ) n++; | |
| 803 | + z = fossil_malloc( i+n+1 ); | |
| 804 | + i = 0; | |
| 805 | + while( zSnip[0] ){ | |
| 806 | + if( zSnip[0]=='<' ){ | |
| 807 | + n = isSnippetMark(zSnip); | |
| 808 | + if( n ){ | |
| 809 | + memcpy(&z[i], zSnip, n); | |
| 810 | + zSnip += n; | |
| 811 | + i += n; | |
| 812 | + continue; | |
| 813 | + }else{ | |
| 814 | + memcpy(&z[i], "<", 4); | |
| 815 | + i += 4; | |
| 816 | + zSnip++; | |
| 817 | + } | |
| 818 | + }else{ | |
| 819 | + z[i++] = zSnip[0]; | |
| 820 | + zSnip++; | |
| 821 | + } | |
| 822 | + } | |
| 823 | + z[i] = 0; | |
| 824 | + return z; | |
| 825 | +} | |
| 777 | 826 | |
| 778 | 827 | |
| 779 | 828 | /* |
| 780 | 829 | ** This routine generates web-page output for a search operation. |
| 781 | 830 | ** Other web-pages can invoke this routine to add search results |
| @@ -813,11 +862,11 @@ | ||
| 813 | 862 | if( nRow==0 ){ |
| 814 | 863 | @ <ol> |
| 815 | 864 | } |
| 816 | 865 | nRow++; |
| 817 | 866 | @ <li><p><a href='%R%s(zUrl)'>%h(zLabel)</a><br> |
| 818 | - @ <span class='snippet'>%s(zSnippet)</span></li> | |
| 867 | + @ <span class='snippet'>%z(cleanSnippet(zSnippet))</span></li> | |
| 819 | 868 | } |
| 820 | 869 | db_finalize(&q); |
| 821 | 870 | if( nRow ){ |
| 822 | 871 | @ </ol> |
| 823 | 872 | } |
| 824 | 873 |
| --- src/search.c | |
| +++ src/search.c | |
| @@ -744,11 +744,11 @@ | |
| 744 | "INSERT INTO x(label,url,score,date,snip) " |
| 745 | " SELECT ftsdocs.label," |
| 746 | " ftsdocs.url," |
| 747 | " rank(matchinfo(ftsidx,'pcsx'))," |
| 748 | " datetime(ftsdocs.mtime)," |
| 749 | " snippet(ftsidx,'<mark>','</mark>')" |
| 750 | " FROM ftsidx, ftsdocs" |
| 751 | " WHERE ftsidx MATCH %Q" |
| 752 | " AND ftsdocs.rowid=ftsidx.docid", |
| 753 | zPattern |
| 754 | ); |
| @@ -772,10 +772,59 @@ | |
| 772 | db_multi_exec("%s",blob_str(&sql)/*safe-for-%s*/); |
| 773 | #if SEARCH_DEBUG_RANK |
| 774 | db_multi_exec("UPDATE x SET label=printf('%%s (score=%%s)',label,score)"); |
| 775 | #endif |
| 776 | } |
| 777 | |
| 778 | |
| 779 | /* |
| 780 | ** This routine generates web-page output for a search operation. |
| 781 | ** Other web-pages can invoke this routine to add search results |
| @@ -813,11 +862,11 @@ | |
| 813 | if( nRow==0 ){ |
| 814 | @ <ol> |
| 815 | } |
| 816 | nRow++; |
| 817 | @ <li><p><a href='%R%s(zUrl)'>%h(zLabel)</a><br> |
| 818 | @ <span class='snippet'>%s(zSnippet)</span></li> |
| 819 | } |
| 820 | db_finalize(&q); |
| 821 | if( nRow ){ |
| 822 | @ </ol> |
| 823 | } |
| 824 |
| --- src/search.c | |
| +++ src/search.c | |
| @@ -744,11 +744,11 @@ | |
| 744 | "INSERT INTO x(label,url,score,date,snip) " |
| 745 | " SELECT ftsdocs.label," |
| 746 | " ftsdocs.url," |
| 747 | " rank(matchinfo(ftsidx,'pcsx'))," |
| 748 | " datetime(ftsdocs.mtime)," |
| 749 | " snippet(ftsidx,'<mark>','</mark>',' ... ')" |
| 750 | " FROM ftsidx, ftsdocs" |
| 751 | " WHERE ftsidx MATCH %Q" |
| 752 | " AND ftsdocs.rowid=ftsidx.docid", |
| 753 | zPattern |
| 754 | ); |
| @@ -772,10 +772,59 @@ | |
| 772 | db_multi_exec("%s",blob_str(&sql)/*safe-for-%s*/); |
| 773 | #if SEARCH_DEBUG_RANK |
| 774 | db_multi_exec("UPDATE x SET label=printf('%%s (score=%%s)',label,score)"); |
| 775 | #endif |
| 776 | } |
| 777 | |
| 778 | /* |
| 779 | ** If z[] is of the form "<mark>TEXT</mark>" where TEXT contains |
| 780 | ** no white-space or punctuation, then return the length of the mark. |
| 781 | ** If |
| 782 | */ |
| 783 | static int isSnippetMark(const char *z){ |
| 784 | int n; |
| 785 | if( strncmp(z,"<mark>",6)!=0 ) return 0; |
| 786 | n = 6; |
| 787 | while( fossil_isalnum(z[n]) ) n++; |
| 788 | if( strncmp(&z[n],"</mark>",7)!=0 ) return 0; |
| 789 | return n+7; |
| 790 | } |
| 791 | |
| 792 | /* |
| 793 | ** Return a copy of zSnip (in memory obtained from fossil_malloc()) that |
| 794 | ** has all "<" characters, other than those on <mark> and </mark>, |
| 795 | ** converted into "<". This is similar to htmlize() except that |
| 796 | ** <mark> and </mark> are preserved. |
| 797 | */ |
| 798 | static char *cleanSnippet(const char *zSnip){ |
| 799 | int i; |
| 800 | int n = 0; |
| 801 | char *z; |
| 802 | for(i=0; zSnip[i]; i++) if( zSnip[i]=='<' ) n++; |
| 803 | z = fossil_malloc( i+n+1 ); |
| 804 | i = 0; |
| 805 | while( zSnip[0] ){ |
| 806 | if( zSnip[0]=='<' ){ |
| 807 | n = isSnippetMark(zSnip); |
| 808 | if( n ){ |
| 809 | memcpy(&z[i], zSnip, n); |
| 810 | zSnip += n; |
| 811 | i += n; |
| 812 | continue; |
| 813 | }else{ |
| 814 | memcpy(&z[i], "<", 4); |
| 815 | i += 4; |
| 816 | zSnip++; |
| 817 | } |
| 818 | }else{ |
| 819 | z[i++] = zSnip[0]; |
| 820 | zSnip++; |
| 821 | } |
| 822 | } |
| 823 | z[i] = 0; |
| 824 | return z; |
| 825 | } |
| 826 | |
| 827 | |
| 828 | /* |
| 829 | ** This routine generates web-page output for a search operation. |
| 830 | ** Other web-pages can invoke this routine to add search results |
| @@ -813,11 +862,11 @@ | |
| 862 | if( nRow==0 ){ |
| 863 | @ <ol> |
| 864 | } |
| 865 | nRow++; |
| 866 | @ <li><p><a href='%R%s(zUrl)'>%h(zLabel)</a><br> |
| 867 | @ <span class='snippet'>%z(cleanSnippet(zSnippet))</span></li> |
| 868 | } |
| 869 | db_finalize(&q); |
| 870 | if( nRow ){ |
| 871 | @ </ol> |
| 872 | } |
| 873 |