Fossil SCM
Improved ranking function for the indexed search.
Commit
91da57d911c1485d9aff5095cfbb62becdd6d36d
Parent
1bad221ecb55897…
1 file changed
+48
-1
+48
-1
| --- src/search.c | ||
| +++ src/search.c | ||
| @@ -677,10 +677,52 @@ | ||
| 677 | 677 | " FROM ticket" |
| 678 | 678 | " WHERE search_match(stext('t',tkt_id,NULL));" |
| 679 | 679 | ); |
| 680 | 680 | } |
| 681 | 681 | } |
| 682 | + | |
| 683 | +/* | |
| 684 | +** Implemenation of the rank() function used with rank(matchinfo(*,'pcsx')). | |
| 685 | +*/ | |
| 686 | +static void search_rank_sqlfunc( | |
| 687 | + sqlite3_context *context, | |
| 688 | + int argc, | |
| 689 | + sqlite3_value **argv | |
| 690 | +){ | |
| 691 | + const unsigned *aVal = (unsigned int*)sqlite3_value_blob(argv[0]); | |
| 692 | + int nVal = sqlite3_value_bytes(argv[0])/4; | |
| 693 | + int nTerm; /* Number of search terms in the query */ | |
| 694 | + int i; /* Loop counter */ | |
| 695 | + double r = 1.0; /* Score */ | |
| 696 | + | |
| 697 | + if( nVal<6 ) return; | |
| 698 | + if( aVal[1]!=1 ) return; | |
| 699 | + nTerm = aVal[0]; | |
| 700 | + r *= 1<<((30*(aVal[2]-1))/nTerm); | |
| 701 | + for(i=1; i<=nTerm; i++){ | |
| 702 | + int hits_this_row = aVal[3*i]; | |
| 703 | + int hits_all_rows = aVal[3*i+1]; | |
| 704 | + int rows_with_hit = aVal[3*i+2]; | |
| 705 | + double avg_hits_per_row = (double)hits_all_rows/(double)rows_with_hit; | |
| 706 | + r *= hits_this_row/avg_hits_per_row; | |
| 707 | + } | |
| 708 | +#define SEARCH_DEBUG_RANK 0 | |
| 709 | +#if SEARCH_DEBUG_RANK | |
| 710 | + { | |
| 711 | + Blob x; | |
| 712 | + blob_init(&x,0,0); | |
| 713 | + blob_appendf(&x,"%08x", (int)r); | |
| 714 | + for(i=0; i<nVal; i++){ | |
| 715 | + blob_appendf(&x," %d", aVal[i]); | |
| 716 | + } | |
| 717 | + blob_appendf(&x," r=%g", r); | |
| 718 | + sqlite3_result_text(context, blob_str(&x), -1, fossil_free); | |
| 719 | + } | |
| 720 | +#else | |
| 721 | + sqlite3_result_double(context, r); | |
| 722 | +#endif | |
| 723 | +} | |
| 682 | 724 | |
| 683 | 725 | /* |
| 684 | 726 | ** When this routine is called, there already exists a table |
| 685 | 727 | ** |
| 686 | 728 | ** x(label,url,score,date,snip). |
| @@ -692,22 +734,27 @@ | ||
| 692 | 734 | */ |
| 693 | 735 | static void search_indexed( |
| 694 | 736 | const char *zPattern, /* The query pattern */ |
| 695 | 737 | unsigned int srchFlags /* What to search over */ |
| 696 | 738 | ){ |
| 739 | + sqlite3_create_function(g.db, "rank", 1, SQLITE_UTF8, 0, | |
| 740 | + search_rank_sqlfunc, 0, 0); | |
| 697 | 741 | db_multi_exec( |
| 698 | 742 | "INSERT INTO x(label,url,score,date,snip) " |
| 699 | 743 | " SELECT ftsdocs.label," |
| 700 | 744 | " ftsdocs.url," |
| 701 | - " 1," /*FIX ME*/ | |
| 745 | + " rank(matchinfo(ftsidx,'pcsx'))," | |
| 702 | 746 | " datetime(ftsdocs.mtime)," |
| 703 | 747 | " snippet(ftsidx)" |
| 704 | 748 | " FROM ftsidx, ftsdocs" |
| 705 | 749 | " WHERE ftsidx MATCH %Q" |
| 706 | 750 | " AND ftsdocs.rowid=ftsidx.docid", |
| 707 | 751 | zPattern |
| 708 | 752 | ); |
| 753 | +#if SEARCH_DEBUG_RANK | |
| 754 | + db_multi_exec("UPDATE x SET label=printf('%%s (score=%%s)',label,score)"); | |
| 755 | +#endif | |
| 709 | 756 | } |
| 710 | 757 | |
| 711 | 758 | |
| 712 | 759 | /* |
| 713 | 760 | ** This routine generates web-page output for a search operation. |
| 714 | 761 |
| --- src/search.c | |
| +++ src/search.c | |
| @@ -677,10 +677,52 @@ | |
| 677 | " FROM ticket" |
| 678 | " WHERE search_match(stext('t',tkt_id,NULL));" |
| 679 | ); |
| 680 | } |
| 681 | } |
| 682 | |
| 683 | /* |
| 684 | ** When this routine is called, there already exists a table |
| 685 | ** |
| 686 | ** x(label,url,score,date,snip). |
| @@ -692,22 +734,27 @@ | |
| 692 | */ |
| 693 | static void search_indexed( |
| 694 | const char *zPattern, /* The query pattern */ |
| 695 | unsigned int srchFlags /* What to search over */ |
| 696 | ){ |
| 697 | db_multi_exec( |
| 698 | "INSERT INTO x(label,url,score,date,snip) " |
| 699 | " SELECT ftsdocs.label," |
| 700 | " ftsdocs.url," |
| 701 | " 1," /*FIX ME*/ |
| 702 | " datetime(ftsdocs.mtime)," |
| 703 | " snippet(ftsidx)" |
| 704 | " FROM ftsidx, ftsdocs" |
| 705 | " WHERE ftsidx MATCH %Q" |
| 706 | " AND ftsdocs.rowid=ftsidx.docid", |
| 707 | zPattern |
| 708 | ); |
| 709 | } |
| 710 | |
| 711 | |
| 712 | /* |
| 713 | ** This routine generates web-page output for a search operation. |
| 714 |
| --- src/search.c | |
| +++ src/search.c | |
| @@ -677,10 +677,52 @@ | |
| 677 | " FROM ticket" |
| 678 | " WHERE search_match(stext('t',tkt_id,NULL));" |
| 679 | ); |
| 680 | } |
| 681 | } |
| 682 | |
| 683 | /* |
| 684 | ** Implemenation of the rank() function used with rank(matchinfo(*,'pcsx')). |
| 685 | */ |
| 686 | static void search_rank_sqlfunc( |
| 687 | sqlite3_context *context, |
| 688 | int argc, |
| 689 | sqlite3_value **argv |
| 690 | ){ |
| 691 | const unsigned *aVal = (unsigned int*)sqlite3_value_blob(argv[0]); |
| 692 | int nVal = sqlite3_value_bytes(argv[0])/4; |
| 693 | int nTerm; /* Number of search terms in the query */ |
| 694 | int i; /* Loop counter */ |
| 695 | double r = 1.0; /* Score */ |
| 696 | |
| 697 | if( nVal<6 ) return; |
| 698 | if( aVal[1]!=1 ) return; |
| 699 | nTerm = aVal[0]; |
| 700 | r *= 1<<((30*(aVal[2]-1))/nTerm); |
| 701 | for(i=1; i<=nTerm; i++){ |
| 702 | int hits_this_row = aVal[3*i]; |
| 703 | int hits_all_rows = aVal[3*i+1]; |
| 704 | int rows_with_hit = aVal[3*i+2]; |
| 705 | double avg_hits_per_row = (double)hits_all_rows/(double)rows_with_hit; |
| 706 | r *= hits_this_row/avg_hits_per_row; |
| 707 | } |
| 708 | #define SEARCH_DEBUG_RANK 0 |
| 709 | #if SEARCH_DEBUG_RANK |
| 710 | { |
| 711 | Blob x; |
| 712 | blob_init(&x,0,0); |
| 713 | blob_appendf(&x,"%08x", (int)r); |
| 714 | for(i=0; i<nVal; i++){ |
| 715 | blob_appendf(&x," %d", aVal[i]); |
| 716 | } |
| 717 | blob_appendf(&x," r=%g", r); |
| 718 | sqlite3_result_text(context, blob_str(&x), -1, fossil_free); |
| 719 | } |
| 720 | #else |
| 721 | sqlite3_result_double(context, r); |
| 722 | #endif |
| 723 | } |
| 724 | |
| 725 | /* |
| 726 | ** When this routine is called, there already exists a table |
| 727 | ** |
| 728 | ** x(label,url,score,date,snip). |
| @@ -692,22 +734,27 @@ | |
| 734 | */ |
| 735 | static void search_indexed( |
| 736 | const char *zPattern, /* The query pattern */ |
| 737 | unsigned int srchFlags /* What to search over */ |
| 738 | ){ |
| 739 | sqlite3_create_function(g.db, "rank", 1, SQLITE_UTF8, 0, |
| 740 | search_rank_sqlfunc, 0, 0); |
| 741 | db_multi_exec( |
| 742 | "INSERT INTO x(label,url,score,date,snip) " |
| 743 | " SELECT ftsdocs.label," |
| 744 | " ftsdocs.url," |
| 745 | " rank(matchinfo(ftsidx,'pcsx'))," |
| 746 | " datetime(ftsdocs.mtime)," |
| 747 | " snippet(ftsidx)" |
| 748 | " FROM ftsidx, ftsdocs" |
| 749 | " WHERE ftsidx MATCH %Q" |
| 750 | " AND ftsdocs.rowid=ftsidx.docid", |
| 751 | zPattern |
| 752 | ); |
| 753 | #if SEARCH_DEBUG_RANK |
| 754 | db_multi_exec("UPDATE x SET label=printf('%%s (score=%%s)',label,score)"); |
| 755 | #endif |
| 756 | } |
| 757 | |
| 758 | |
| 759 | /* |
| 760 | ** This routine generates web-page output for a search operation. |
| 761 |