Fossil SCM

Improved ranking function for the indexed search.

drh 2015-02-03 01:55 UTC indexed-fts
Commit 91da57d911c1485d9aff5095cfbb62becdd6d36d
1 file changed +48 -1
+48 -1
--- src/search.c
+++ src/search.c
@@ -677,10 +677,52 @@
677677
" FROM ticket"
678678
" WHERE search_match(stext('t',tkt_id,NULL));"
679679
);
680680
}
681681
}
682
+
683
+/*
684
+** Implemenation of the rank() function used with rank(matchinfo(*,'pcsx')).
685
+*/
686
+static void search_rank_sqlfunc(
687
+ sqlite3_context *context,
688
+ int argc,
689
+ sqlite3_value **argv
690
+){
691
+ const unsigned *aVal = (unsigned int*)sqlite3_value_blob(argv[0]);
692
+ int nVal = sqlite3_value_bytes(argv[0])/4;
693
+ int nTerm; /* Number of search terms in the query */
694
+ int i; /* Loop counter */
695
+ double r = 1.0; /* Score */
696
+
697
+ if( nVal<6 ) return;
698
+ if( aVal[1]!=1 ) return;
699
+ nTerm = aVal[0];
700
+ r *= 1<<((30*(aVal[2]-1))/nTerm);
701
+ for(i=1; i<=nTerm; i++){
702
+ int hits_this_row = aVal[3*i];
703
+ int hits_all_rows = aVal[3*i+1];
704
+ int rows_with_hit = aVal[3*i+2];
705
+ double avg_hits_per_row = (double)hits_all_rows/(double)rows_with_hit;
706
+ r *= hits_this_row/avg_hits_per_row;
707
+ }
708
+#define SEARCH_DEBUG_RANK 0
709
+#if SEARCH_DEBUG_RANK
710
+ {
711
+ Blob x;
712
+ blob_init(&x,0,0);
713
+ blob_appendf(&x,"%08x", (int)r);
714
+ for(i=0; i<nVal; i++){
715
+ blob_appendf(&x," %d", aVal[i]);
716
+ }
717
+ blob_appendf(&x," r=%g", r);
718
+ sqlite3_result_text(context, blob_str(&x), -1, fossil_free);
719
+ }
720
+#else
721
+ sqlite3_result_double(context, r);
722
+#endif
723
+}
682724
683725
/*
684726
** When this routine is called, there already exists a table
685727
**
686728
** x(label,url,score,date,snip).
@@ -692,22 +734,27 @@
692734
*/
693735
static void search_indexed(
694736
const char *zPattern, /* The query pattern */
695737
unsigned int srchFlags /* What to search over */
696738
){
739
+ sqlite3_create_function(g.db, "rank", 1, SQLITE_UTF8, 0,
740
+ search_rank_sqlfunc, 0, 0);
697741
db_multi_exec(
698742
"INSERT INTO x(label,url,score,date,snip) "
699743
" SELECT ftsdocs.label,"
700744
" ftsdocs.url,"
701
- " 1," /*FIX ME*/
745
+ " rank(matchinfo(ftsidx,'pcsx')),"
702746
" datetime(ftsdocs.mtime),"
703747
" snippet(ftsidx)"
704748
" FROM ftsidx, ftsdocs"
705749
" WHERE ftsidx MATCH %Q"
706750
" AND ftsdocs.rowid=ftsidx.docid",
707751
zPattern
708752
);
753
+#if SEARCH_DEBUG_RANK
754
+ db_multi_exec("UPDATE x SET label=printf('%%s (score=%%s)',label,score)");
755
+#endif
709756
}
710757
711758
712759
/*
713760
** This routine generates web-page output for a search operation.
714761
--- src/search.c
+++ src/search.c
@@ -677,10 +677,52 @@
677 " FROM ticket"
678 " WHERE search_match(stext('t',tkt_id,NULL));"
679 );
680 }
681 }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
682
683 /*
684 ** When this routine is called, there already exists a table
685 **
686 ** x(label,url,score,date,snip).
@@ -692,22 +734,27 @@
692 */
693 static void search_indexed(
694 const char *zPattern, /* The query pattern */
695 unsigned int srchFlags /* What to search over */
696 ){
 
 
697 db_multi_exec(
698 "INSERT INTO x(label,url,score,date,snip) "
699 " SELECT ftsdocs.label,"
700 " ftsdocs.url,"
701 " 1," /*FIX ME*/
702 " datetime(ftsdocs.mtime),"
703 " snippet(ftsidx)"
704 " FROM ftsidx, ftsdocs"
705 " WHERE ftsidx MATCH %Q"
706 " AND ftsdocs.rowid=ftsidx.docid",
707 zPattern
708 );
 
 
 
709 }
710
711
712 /*
713 ** This routine generates web-page output for a search operation.
714
--- src/search.c
+++ src/search.c
@@ -677,10 +677,52 @@
677 " FROM ticket"
678 " WHERE search_match(stext('t',tkt_id,NULL));"
679 );
680 }
681 }
682
683 /*
684 ** Implemenation of the rank() function used with rank(matchinfo(*,'pcsx')).
685 */
686 static void search_rank_sqlfunc(
687 sqlite3_context *context,
688 int argc,
689 sqlite3_value **argv
690 ){
691 const unsigned *aVal = (unsigned int*)sqlite3_value_blob(argv[0]);
692 int nVal = sqlite3_value_bytes(argv[0])/4;
693 int nTerm; /* Number of search terms in the query */
694 int i; /* Loop counter */
695 double r = 1.0; /* Score */
696
697 if( nVal<6 ) return;
698 if( aVal[1]!=1 ) return;
699 nTerm = aVal[0];
700 r *= 1<<((30*(aVal[2]-1))/nTerm);
701 for(i=1; i<=nTerm; i++){
702 int hits_this_row = aVal[3*i];
703 int hits_all_rows = aVal[3*i+1];
704 int rows_with_hit = aVal[3*i+2];
705 double avg_hits_per_row = (double)hits_all_rows/(double)rows_with_hit;
706 r *= hits_this_row/avg_hits_per_row;
707 }
708 #define SEARCH_DEBUG_RANK 0
709 #if SEARCH_DEBUG_RANK
710 {
711 Blob x;
712 blob_init(&x,0,0);
713 blob_appendf(&x,"%08x", (int)r);
714 for(i=0; i<nVal; i++){
715 blob_appendf(&x," %d", aVal[i]);
716 }
717 blob_appendf(&x," r=%g", r);
718 sqlite3_result_text(context, blob_str(&x), -1, fossil_free);
719 }
720 #else
721 sqlite3_result_double(context, r);
722 #endif
723 }
724
725 /*
726 ** When this routine is called, there already exists a table
727 **
728 ** x(label,url,score,date,snip).
@@ -692,22 +734,27 @@
734 */
735 static void search_indexed(
736 const char *zPattern, /* The query pattern */
737 unsigned int srchFlags /* What to search over */
738 ){
739 sqlite3_create_function(g.db, "rank", 1, SQLITE_UTF8, 0,
740 search_rank_sqlfunc, 0, 0);
741 db_multi_exec(
742 "INSERT INTO x(label,url,score,date,snip) "
743 " SELECT ftsdocs.label,"
744 " ftsdocs.url,"
745 " rank(matchinfo(ftsidx,'pcsx')),"
746 " datetime(ftsdocs.mtime),"
747 " snippet(ftsidx)"
748 " FROM ftsidx, ftsdocs"
749 " WHERE ftsidx MATCH %Q"
750 " AND ftsdocs.rowid=ftsidx.docid",
751 zPattern
752 );
753 #if SEARCH_DEBUG_RANK
754 db_multi_exec("UPDATE x SET label=printf('%%s (score=%%s)',label,score)");
755 #endif
756 }
757
758
759 /*
760 ** This routine generates web-page output for a search operation.
761

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button