Fossil SCM

Do a better job of escaping stray HTML markup in snippet text.

drh 2015-02-03 13:58 trunk
Commit bc8075de5fa2a43a6ac44fa45999d22eecbdfcb7
1 file changed +51 -2
+51 -2
--- src/search.c
+++ src/search.c
@@ -744,11 +744,11 @@
744744
"INSERT INTO x(label,url,score,date,snip) "
745745
" SELECT ftsdocs.label,"
746746
" ftsdocs.url,"
747747
" rank(matchinfo(ftsidx,'pcsx')),"
748748
" datetime(ftsdocs.mtime),"
749
- " snippet(ftsidx,'<mark>','</mark>')"
749
+ " snippet(ftsidx,'<mark>','</mark>',' ... ')"
750750
" FROM ftsidx, ftsdocs"
751751
" WHERE ftsidx MATCH %Q"
752752
" AND ftsdocs.rowid=ftsidx.docid",
753753
zPattern
754754
);
@@ -772,10 +772,59 @@
772772
db_multi_exec("%s",blob_str(&sql)/*safe-for-%s*/);
773773
#if SEARCH_DEBUG_RANK
774774
db_multi_exec("UPDATE x SET label=printf('%%s (score=%%s)',label,score)");
775775
#endif
776776
}
777
+
778
+/*
779
+** If z[] is of the form "<mark>TEXT</mark>" where TEXT contains
780
+** no white-space or punctuation, then return the length of the mark.
781
+** If
782
+*/
783
+static int isSnippetMark(const char *z){
784
+ int n;
785
+ if( strncmp(z,"<mark>",6)!=0 ) return 0;
786
+ n = 6;
787
+ while( fossil_isalnum(z[n]) ) n++;
788
+ if( strncmp(&z[n],"</mark>",7)!=0 ) return 0;
789
+ return n+7;
790
+}
791
+
792
+/*
793
+** Return a copy of zSnip (in memory obtained from fossil_malloc()) that
794
+** has all "<" characters, other than those on <mark> and </mark>,
795
+** converted into "&lt;". This is similar to htmlize() except that
796
+** <mark> and </mark> are preserved.
797
+*/
798
+static char *cleanSnippet(const char *zSnip){
799
+ int i;
800
+ int n = 0;
801
+ char *z;
802
+ for(i=0; zSnip[i]; i++) if( zSnip[i]=='<' ) n++;
803
+ z = fossil_malloc( i+n+1 );
804
+ i = 0;
805
+ while( zSnip[0] ){
806
+ if( zSnip[0]=='<' ){
807
+ n = isSnippetMark(zSnip);
808
+ if( n ){
809
+ memcpy(&z[i], zSnip, n);
810
+ zSnip += n;
811
+ i += n;
812
+ continue;
813
+ }else{
814
+ memcpy(&z[i], "&lt;", 4);
815
+ i += 4;
816
+ zSnip++;
817
+ }
818
+ }else{
819
+ z[i++] = zSnip[0];
820
+ zSnip++;
821
+ }
822
+ }
823
+ z[i] = 0;
824
+ return z;
825
+}
777826
778827
779828
/*
780829
** This routine generates web-page output for a search operation.
781830
** Other web-pages can invoke this routine to add search results
@@ -813,11 +862,11 @@
813862
if( nRow==0 ){
814863
@ <ol>
815864
}
816865
nRow++;
817866
@ <li><p><a href='%R%s(zUrl)'>%h(zLabel)</a><br>
818
- @ <span class='snippet'>%s(zSnippet)</span></li>
867
+ @ <span class='snippet'>%z(cleanSnippet(zSnippet))</span></li>
819868
}
820869
db_finalize(&q);
821870
if( nRow ){
822871
@ </ol>
823872
}
824873
--- src/search.c
+++ src/search.c
@@ -744,11 +744,11 @@
744 "INSERT INTO x(label,url,score,date,snip) "
745 " SELECT ftsdocs.label,"
746 " ftsdocs.url,"
747 " rank(matchinfo(ftsidx,'pcsx')),"
748 " datetime(ftsdocs.mtime),"
749 " snippet(ftsidx,'<mark>','</mark>')"
750 " FROM ftsidx, ftsdocs"
751 " WHERE ftsidx MATCH %Q"
752 " AND ftsdocs.rowid=ftsidx.docid",
753 zPattern
754 );
@@ -772,10 +772,59 @@
772 db_multi_exec("%s",blob_str(&sql)/*safe-for-%s*/);
773 #if SEARCH_DEBUG_RANK
774 db_multi_exec("UPDATE x SET label=printf('%%s (score=%%s)',label,score)");
775 #endif
776 }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
777
778
779 /*
780 ** This routine generates web-page output for a search operation.
781 ** Other web-pages can invoke this routine to add search results
@@ -813,11 +862,11 @@
813 if( nRow==0 ){
814 @ <ol>
815 }
816 nRow++;
817 @ <li><p><a href='%R%s(zUrl)'>%h(zLabel)</a><br>
818 @ <span class='snippet'>%s(zSnippet)</span></li>
819 }
820 db_finalize(&q);
821 if( nRow ){
822 @ </ol>
823 }
824
--- src/search.c
+++ src/search.c
@@ -744,11 +744,11 @@
744 "INSERT INTO x(label,url,score,date,snip) "
745 " SELECT ftsdocs.label,"
746 " ftsdocs.url,"
747 " rank(matchinfo(ftsidx,'pcsx')),"
748 " datetime(ftsdocs.mtime),"
749 " snippet(ftsidx,'<mark>','</mark>',' ... ')"
750 " FROM ftsidx, ftsdocs"
751 " WHERE ftsidx MATCH %Q"
752 " AND ftsdocs.rowid=ftsidx.docid",
753 zPattern
754 );
@@ -772,10 +772,59 @@
772 db_multi_exec("%s",blob_str(&sql)/*safe-for-%s*/);
773 #if SEARCH_DEBUG_RANK
774 db_multi_exec("UPDATE x SET label=printf('%%s (score=%%s)',label,score)");
775 #endif
776 }
777
778 /*
779 ** If z[] is of the form "<mark>TEXT</mark>" where TEXT contains
780 ** no white-space or punctuation, then return the length of the mark.
781 ** If
782 */
783 static int isSnippetMark(const char *z){
784 int n;
785 if( strncmp(z,"<mark>",6)!=0 ) return 0;
786 n = 6;
787 while( fossil_isalnum(z[n]) ) n++;
788 if( strncmp(&z[n],"</mark>",7)!=0 ) return 0;
789 return n+7;
790 }
791
792 /*
793 ** Return a copy of zSnip (in memory obtained from fossil_malloc()) that
794 ** has all "<" characters, other than those on <mark> and </mark>,
795 ** converted into "&lt;". This is similar to htmlize() except that
796 ** <mark> and </mark> are preserved.
797 */
798 static char *cleanSnippet(const char *zSnip){
799 int i;
800 int n = 0;
801 char *z;
802 for(i=0; zSnip[i]; i++) if( zSnip[i]=='<' ) n++;
803 z = fossil_malloc( i+n+1 );
804 i = 0;
805 while( zSnip[0] ){
806 if( zSnip[0]=='<' ){
807 n = isSnippetMark(zSnip);
808 if( n ){
809 memcpy(&z[i], zSnip, n);
810 zSnip += n;
811 i += n;
812 continue;
813 }else{
814 memcpy(&z[i], "&lt;", 4);
815 i += 4;
816 zSnip++;
817 }
818 }else{
819 z[i++] = zSnip[0];
820 zSnip++;
821 }
822 }
823 z[i] = 0;
824 return z;
825 }
826
827
828 /*
829 ** This routine generates web-page output for a search operation.
830 ** Other web-pages can invoke this routine to add search results
@@ -813,11 +862,11 @@
862 if( nRow==0 ){
863 @ <ol>
864 }
865 nRow++;
866 @ <li><p><a href='%R%s(zUrl)'>%h(zLabel)</a><br>
867 @ <span class='snippet'>%z(cleanSnippet(zSnippet))</span></li>
868 }
869 db_finalize(&q);
870 if( nRow ){
871 @ </ol>
872 }
873

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button