Fossil SCM

Enabled indexed search with separate title and body and with the option to use the Porter stemmer.

drh 2015-02-14 00:37 UTC search-enhancements
Commit 71295a98b7bce92bb20158f529021ef8bcd020dc
3 files changed +2 -7 +61 -34 +2
+2 -7
--- src/db.c
+++ src/db.c
@@ -65,14 +65,10 @@
6565
*/
6666
static void db_err(const char *zFormat, ...){
6767
va_list ap;
6868
char *z;
6969
int rc = 1;
70
- static const char zRebuildMsg[] =
71
- "If you have recently updated your fossil executable, you might\n"
72
- "need to run \"fossil all rebuild\" to bring the repository\n"
73
- "schemas up to date.\n";
7470
va_start(ap, zFormat);
7571
z = vmprintf(zFormat, ap);
7672
va_end(ap);
7773
#ifdef FOSSIL_ENABLE_JSON
7874
if( g.json.isJsonMode ){
@@ -88,15 +84,14 @@
8884
@ error Database\serror:\s%F(z)
8985
cgi_reply();
9086
}
9187
else if( g.cgiOutput ){
9288
g.cgiOutput = 0;
93
- cgi_printf("<h1>Database Error</h1>\n"
94
- "<pre>%h</pre>\n<p>%s</p>\n", z, zRebuildMsg);
89
+ cgi_printf("<h1>Database Error</h1>\n<p>%h</p>\n", z);
9590
cgi_reply();
9691
}else{
97
- fprintf(stderr, "%s: %s\n\n%s", g.argv[0], z, zRebuildMsg);
92
+ fprintf(stderr, "%s: %s\n", g.argv[0], z);
9893
}
9994
free(z);
10095
db_force_rollback();
10196
fossil_exit(rc);
10297
}
10398
--- src/db.c
+++ src/db.c
@@ -65,14 +65,10 @@
65 */
66 static void db_err(const char *zFormat, ...){
67 va_list ap;
68 char *z;
69 int rc = 1;
70 static const char zRebuildMsg[] =
71 "If you have recently updated your fossil executable, you might\n"
72 "need to run \"fossil all rebuild\" to bring the repository\n"
73 "schemas up to date.\n";
74 va_start(ap, zFormat);
75 z = vmprintf(zFormat, ap);
76 va_end(ap);
77 #ifdef FOSSIL_ENABLE_JSON
78 if( g.json.isJsonMode ){
@@ -88,15 +84,14 @@
88 @ error Database\serror:\s%F(z)
89 cgi_reply();
90 }
91 else if( g.cgiOutput ){
92 g.cgiOutput = 0;
93 cgi_printf("<h1>Database Error</h1>\n"
94 "<pre>%h</pre>\n<p>%s</p>\n", z, zRebuildMsg);
95 cgi_reply();
96 }else{
97 fprintf(stderr, "%s: %s\n\n%s", g.argv[0], z, zRebuildMsg);
98 }
99 free(z);
100 db_force_rollback();
101 fossil_exit(rc);
102 }
103
--- src/db.c
+++ src/db.c
@@ -65,14 +65,10 @@
65 */
66 static void db_err(const char *zFormat, ...){
67 va_list ap;
68 char *z;
69 int rc = 1;
 
 
 
 
70 va_start(ap, zFormat);
71 z = vmprintf(zFormat, ap);
72 va_end(ap);
73 #ifdef FOSSIL_ENABLE_JSON
74 if( g.json.isJsonMode ){
@@ -88,15 +84,14 @@
84 @ error Database\serror:\s%F(z)
85 cgi_reply();
86 }
87 else if( g.cgiOutput ){
88 g.cgiOutput = 0;
89 cgi_printf("<h1>Database Error</h1>\n<p>%h</p>\n", z);
 
90 cgi_reply();
91 }else{
92 fprintf(stderr, "%s: %s\n", g.argv[0], z);
93 }
94 free(z);
95 db_force_rollback();
96 fossil_exit(rc);
97 }
98
+61 -34
--- src/search.c
+++ src/search.c
@@ -730,24 +730,32 @@
730730
int argc,
731731
sqlite3_value **argv
732732
){
733733
const unsigned *aVal = (unsigned int*)sqlite3_value_blob(argv[0]);
734734
int nVal = sqlite3_value_bytes(argv[0])/4;
735
+ int nCol; /* Number of columns in the index */
735736
int nTerm; /* Number of search terms in the query */
736
- int i; /* Loop counter */
737
+ int i, j; /* Loop counter */
737738
double r = 1.0; /* Score */
739
+ const unsigned *aX, *aS;
738740
739
- if( nVal<6 ) return;
740
- if( aVal[1]!=1 ) return;
741
+ if( nVal<2 ) return;
741742
nTerm = aVal[0];
742
- r *= 1<<((30*(aVal[2]-1))/nTerm);
743
- for(i=1; i<=nTerm; i++){
744
- int hits_this_row = aVal[3*i];
745
- int hits_all_rows = aVal[3*i+1];
746
- int rows_with_hit = aVal[3*i+2];
747
- double avg_hits_per_row = (double)hits_all_rows/(double)rows_with_hit;
748
- r *= hits_this_row/avg_hits_per_row;
743
+ nCol = aVal[1];
744
+ if( nVal<2+3*nCol*nTerm+4*nCol ) return;
745
+ aS = aVal+2;
746
+ aX = aS+nCol;
747
+ for(j=0; j<nCol; j++){
748
+ r *= 1<<((30*(aS[j]-1))/nTerm);
749
+ for(i=0; i<nTerm; i++){
750
+ int hits_this_row = aX[j + i*nCol];
751
+ int hits_all_rows = aX[j + i*nCol + 1];
752
+ int rows_with_hit = aX[j + i*nCol + 2];
753
+ double avg_hits_per_row = (double)hits_all_rows/(double)rows_with_hit;
754
+ r *= hits_this_row/avg_hits_per_row;
755
+ }
756
+ r *= 2.0;
749757
}
750758
#define SEARCH_DEBUG_RANK 0
751759
#if SEARCH_DEBUG_RANK
752760
{
753761
Blob x;
@@ -1294,20 +1302,21 @@
12941302
@ name TEXT, -- Additional document description
12951303
@ idxed BOOLEAN, -- True if currently in the index
12961304
@ label TEXT, -- Label to print on search results
12971305
@ url TEXT, -- URL to access this document
12981306
@ mtime DATE, -- Date when document created
1307
+@ bx TEXT, -- Temporary "body" content cache
12991308
@ UNIQUE(type,rid)
13001309
@ );
13011310
@ CREATE INDEX "%w".ftsdocIdxed ON ftsdocs(type,rid,name) WHERE idxed==0;
13021311
@ CREATE INDEX "%w".ftsdocName ON ftsdocs(name) WHERE type='w';
13031312
@ CREATE VIEW IF NOT EXISTS "%w".ftscontent AS
13041313
@ SELECT rowid, type, rid, name, idxed, label, url, mtime,
1305
-@ stext(type,rid,name) AS 'stext'
1314
+@ title(type,rid,name) AS 'title', body(type,rid,name) AS 'body'
13061315
@ FROM ftsdocs;
13071316
@ CREATE VIRTUAL TABLE IF NOT EXISTS "%w".ftsidx
1308
-@ USING fts4(content="ftscontent", stext);
1317
+@ USING fts4(content="ftscontent", title, body%s);
13091318
;
13101319
static const char zFtsDrop[] =
13111320
@ DROP TABLE IF EXISTS "%w".ftsidx;
13121321
@ DROP VIEW IF EXISTS "%w".ftscontent;
13131322
@ DROP TABLE IF EXISTS "%w".ftsdocs;
@@ -1317,13 +1326,15 @@
13171326
** Create or drop the tables associated with a full-text index.
13181327
*/
13191328
static int searchIdxExists = -1;
13201329
void search_create_index(void){
13211330
const char *zDb = db_name("repository");
1331
+ int useStemmer = db_get_boolean("search-stemmer",0);
1332
+ const char *zExtra = useStemmer ? ",tokenize=porter" : "";
13221333
search_sql_setup(g.db);
1323
- db_multi_exec(zFtsSchema/*works-like:"%w%w%w%w%w"*/,
1324
- zDb, zDb, zDb, zDb, zDb);
1334
+ db_multi_exec(zFtsSchema/*works-like:"%w%w%w%w%w%s"*/,
1335
+ zDb, zDb, zDb, zDb, zDb, zExtra/*safe-for-%s*/);
13251336
searchIdxExists = 1;
13261337
}
13271338
void search_drop_index(void){
13281339
const char *zDb = db_name("repository");
13291340
db_multi_exec(zFtsDrop/*works-like:"%w%w%w"*/, zDb, zDb, zDb);
@@ -1441,34 +1452,38 @@
14411452
db_multi_exec(
14421453
"DELETE FROM ftsdocs WHERE type='d'"
14431454
" AND rid NOT IN (SELECT rid FROM current_docs)"
14441455
);
14451456
db_multi_exec(
1446
- "INSERT OR IGNORE INTO ftsdocs(type,rid,name,idxed,label,url,mtime)"
1457
+ "INSERT OR IGNORE INTO ftsdocs(type,rid,name,idxed,label,bx,url,mtime)"
14471458
" SELECT 'd', rid, name, 0,"
1448
- " printf('Document: %%s',name),"
1459
+ " 'Document: '||title('d',rid,name),"
1460
+ " body('d',rid,name),"
14491461
" printf('/doc/%q/%%s',urlencode(name)),"
14501462
" %.17g"
14511463
" FROM current_docs",
14521464
zBrUuid, rTime
14531465
);
14541466
db_multi_exec(
1455
- "INSERT INTO ftsidx(docid,stext)"
1456
- " SELECT rowid, stext FROM ftscontent WHERE type='d' AND NOT idxed"
1467
+ "INSERT INTO ftsidx(docid,title,body)"
1468
+ " SELECT rowid, name, bx FROM ftsdocs WHERE type='d' AND NOT idxed"
14571469
);
14581470
db_multi_exec(
1459
- "UPDATE ftsdocs SET idxed=1 WHERE type='d' AND NOT idxed"
1471
+ "UPDATE ftsdocs SET"
1472
+ " idxed=1,"
1473
+ " bx=NULL"
1474
+ " WHERE type='d' AND NOT idxed"
14601475
);
14611476
}
14621477
14631478
/*
14641479
** Deal with all of the unindexed 'c' terms in FTSDOCS
14651480
*/
14661481
static void search_update_checkin_index(void){
14671482
db_multi_exec(
1468
- "INSERT INTO ftsidx(docid,stext)"
1469
- " SELECT rowid, stext('c',rid,NULL) FROM ftsdocs"
1483
+ "INSERT INTO ftsidx(docid,title,body)"
1484
+ " SELECT rowid, '', body('c',rid,NULL) FROM ftsdocs"
14701485
" WHERE type='c' AND NOT idxed;"
14711486
);
14721487
db_multi_exec(
14731488
"REPLACE INTO ftsdocs(rowid,idxed,type,rid,name,label,url,mtime)"
14741489
" SELECT ftsdocs.rowid, 1, 'c', ftsdocs.rid, NULL,"
@@ -1485,19 +1500,20 @@
14851500
/*
14861501
** Deal with all of the unindexed 't' terms in FTSDOCS
14871502
*/
14881503
static void search_update_ticket_index(void){
14891504
db_multi_exec(
1490
- "INSERT INTO ftsidx(docid,stext)"
1491
- " SELECT rowid, stext('t',rid,NULL) FROM ftsdocs"
1505
+ "INSERT INTO ftsidx(docid,title,body)"
1506
+ " SELECT rowid, title('t',rid,NULL), body('t',rid,NULL) FROM ftsdocs"
14921507
" WHERE type='t' AND NOT idxed;"
14931508
);
14941509
if( db_changes()==0 ) return;
14951510
db_multi_exec(
14961511
"REPLACE INTO ftsdocs(rowid,idxed,type,rid,name,label,url,mtime)"
14971512
" SELECT ftsdocs.rowid, 1, 't', ftsdocs.rid, NULL,"
1498
- " printf('Ticket [%%.16s] on %%s',tkt_uuid,datetime(tkt_mtime)),"
1513
+ " printf('Ticket: %%s (%%s)',title('t',tkt_id,null),"
1514
+ " datetime(tkt_mtime)),"
14991515
" printf('/tktview/%%.20s',tkt_uuid),"
15001516
" tkt_mtime"
15011517
" FROM ftsdocs, ticket"
15021518
" WHERE ftsdocs.type='t' AND NOT ftsdocs.idxed"
15031519
" AND ticket.tkt_id=ftsdocs.rid"
@@ -1507,12 +1523,12 @@
15071523
/*
15081524
** Deal with all of the unindexed 'w' terms in FTSDOCS
15091525
*/
15101526
static void search_update_wiki_index(void){
15111527
db_multi_exec(
1512
- "INSERT INTO ftsidx(docid,stext)"
1513
- " SELECT rowid, stext('w',rid,NULL) FROM ftsdocs"
1528
+ "INSERT INTO ftsidx(docid,title,body)"
1529
+ " SELECT rowid, title('w',rid,NULL),body('w',rid,NULL) FROM ftsdocs"
15141530
" WHERE type='w' AND NOT idxed;"
15151531
);
15161532
if( db_changes()==0 ) return;
15171533
db_multi_exec(
15181534
"REPLACE INTO ftsdocs(rowid,idxed,type,rid,name,label,url,mtime)"
@@ -1565,19 +1581,22 @@
15651581
** Usage: fossil fts-config ?SUBCOMMAND? ?ARGUMENT?
15661582
**
15671583
** The "fossil fts-config" command configures the full-text search capabilities
15681584
** of the repository. Subcommands:
15691585
**
1570
-** reindex Rebuild the search index. Create it if it does
1571
-** not already exist
1586
+** reindex Rebuild the search index. This is a no-op if
1587
+** index search is disabled
15721588
**
15731589
** index (on|off) Turn the search index on or off
15741590
**
15751591
** enable cdtw Enable various kinds of search. c=Check-ins,
15761592
** d=Documents, t=Tickets, w=Wiki.
15771593
**
15781594
** disable cdtw Disable versious kinds of search
1595
+**
1596
+** stemmer (on|off) Turn the Porter stemmer on or off for indexed
1597
+** search. (Unindexed search is never stemmed.)
15791598
**
15801599
** The current search settings are displayed after any changes are applied.
15811600
** Run this command with no arguments to simply see the settings.
15821601
*/
15831602
void test_fts_cmd(void){
@@ -1584,16 +1603,17 @@
15841603
static const struct { int iCmd; const char *z; } aCmd[] = {
15851604
{ 1, "reindex" },
15861605
{ 2, "index" },
15871606
{ 3, "disable" },
15881607
{ 4, "enable" },
1608
+ { 5, "stemmer" },
15891609
};
15901610
static const struct { char *zSetting; char *zName; char *zSw; } aSetng[] = {
1591
- { "search-ckin", "check-in search:", "c" },
1592
- { "search-doc", "document search:", "d" },
1593
- { "search-tkt", "ticket search:", "t" },
1594
- { "search-wiki", "wiki search:", "w" },
1611
+ { "search-ckin", "check-in search:", "c" },
1612
+ { "search-doc", "document search:", "d" },
1613
+ { "search-tkt", "ticket search:", "t" },
1614
+ { "search-wiki", "wiki search:", "w" },
15951615
};
15961616
char *zSubCmd;
15971617
int i, j, n;
15981618
int iCmd = 0;
15991619
int iAction = 0;
@@ -1613,11 +1633,11 @@
16131633
return;
16141634
}
16151635
iCmd = aCmd[i].iCmd;
16161636
}
16171637
if( iCmd==1 ){
1618
- iAction = 2;
1638
+ if( search_index_exists() ) iAction = 2;
16191639
}
16201640
if( iCmd==2 ){
16211641
if( g.argc<3 ) usage("index (on|off)");
16221642
iAction = 1 + is_truth(g.argv[3]);
16231643
}
@@ -1624,18 +1644,23 @@
16241644
db_begin_transaction();
16251645
16261646
/* Adjust search settings */
16271647
if( iCmd==3 || iCmd==4 ){
16281648
const char *zCtrl;
1629
- if( g.argc<4 ) usage("enable STRING");
1649
+ if( g.argc<4 ) usage(mprintf("%s STRING",zSubCmd));
16301650
zCtrl = g.argv[3];
16311651
for(j=0; j<ArraySize(aSetng); j++){
16321652
if( strchr(zCtrl, aSetng[j].zSw[0])!=0 ){
16331653
db_set_int(aSetng[j].zSetting, iCmd-3, 0);
16341654
}
16351655
}
16361656
}
1657
+ if( iCmd==5 ){
1658
+ if( g.argc<4 ) usage("porter ON/OFF");
1659
+ db_set_int("search-stemmer", is_truth(g.argv[3]), 0);
1660
+ }
1661
+
16371662
16381663
/* destroy or rebuild the index, if requested */
16391664
if( iAction>=1 ){
16401665
search_drop_index();
16411666
}
@@ -1646,14 +1671,16 @@
16461671
/* Always show the status before ending */
16471672
for(i=0; i<ArraySize(aSetng); i++){
16481673
fossil_print("%-16s %s\n", aSetng[i].zName,
16491674
db_get_boolean(aSetng[i].zSetting,0) ? "on" : "off");
16501675
}
1676
+ fossil_print("%-16s %s\n", "Porter stemmer:",
1677
+ db_get_boolean("search-stemmer",0) ? "on" : "off");
16511678
if( search_index_exists() ){
16521679
fossil_print("%-16s enabled\n", "full-text index:");
16531680
fossil_print("%-16s %d\n", "documents:",
16541681
db_int(0, "SELECT count(*) FROM ftsdocs"));
16551682
}else{
16561683
fossil_print("%-16s disabled\n", "full-text index:");
16571684
}
16581685
db_end_transaction(0);
16591686
}
16601687
--- src/search.c
+++ src/search.c
@@ -730,24 +730,32 @@
730 int argc,
731 sqlite3_value **argv
732 ){
733 const unsigned *aVal = (unsigned int*)sqlite3_value_blob(argv[0]);
734 int nVal = sqlite3_value_bytes(argv[0])/4;
 
735 int nTerm; /* Number of search terms in the query */
736 int i; /* Loop counter */
737 double r = 1.0; /* Score */
 
738
739 if( nVal<6 ) return;
740 if( aVal[1]!=1 ) return;
741 nTerm = aVal[0];
742 r *= 1<<((30*(aVal[2]-1))/nTerm);
743 for(i=1; i<=nTerm; i++){
744 int hits_this_row = aVal[3*i];
745 int hits_all_rows = aVal[3*i+1];
746 int rows_with_hit = aVal[3*i+2];
747 double avg_hits_per_row = (double)hits_all_rows/(double)rows_with_hit;
748 r *= hits_this_row/avg_hits_per_row;
 
 
 
 
 
 
 
749 }
750 #define SEARCH_DEBUG_RANK 0
751 #if SEARCH_DEBUG_RANK
752 {
753 Blob x;
@@ -1294,20 +1302,21 @@
1294 @ name TEXT, -- Additional document description
1295 @ idxed BOOLEAN, -- True if currently in the index
1296 @ label TEXT, -- Label to print on search results
1297 @ url TEXT, -- URL to access this document
1298 @ mtime DATE, -- Date when document created
 
1299 @ UNIQUE(type,rid)
1300 @ );
1301 @ CREATE INDEX "%w".ftsdocIdxed ON ftsdocs(type,rid,name) WHERE idxed==0;
1302 @ CREATE INDEX "%w".ftsdocName ON ftsdocs(name) WHERE type='w';
1303 @ CREATE VIEW IF NOT EXISTS "%w".ftscontent AS
1304 @ SELECT rowid, type, rid, name, idxed, label, url, mtime,
1305 @ stext(type,rid,name) AS 'stext'
1306 @ FROM ftsdocs;
1307 @ CREATE VIRTUAL TABLE IF NOT EXISTS "%w".ftsidx
1308 @ USING fts4(content="ftscontent", stext);
1309 ;
1310 static const char zFtsDrop[] =
1311 @ DROP TABLE IF EXISTS "%w".ftsidx;
1312 @ DROP VIEW IF EXISTS "%w".ftscontent;
1313 @ DROP TABLE IF EXISTS "%w".ftsdocs;
@@ -1317,13 +1326,15 @@
1317 ** Create or drop the tables associated with a full-text index.
1318 */
1319 static int searchIdxExists = -1;
1320 void search_create_index(void){
1321 const char *zDb = db_name("repository");
 
 
1322 search_sql_setup(g.db);
1323 db_multi_exec(zFtsSchema/*works-like:"%w%w%w%w%w"*/,
1324 zDb, zDb, zDb, zDb, zDb);
1325 searchIdxExists = 1;
1326 }
1327 void search_drop_index(void){
1328 const char *zDb = db_name("repository");
1329 db_multi_exec(zFtsDrop/*works-like:"%w%w%w"*/, zDb, zDb, zDb);
@@ -1441,34 +1452,38 @@
1441 db_multi_exec(
1442 "DELETE FROM ftsdocs WHERE type='d'"
1443 " AND rid NOT IN (SELECT rid FROM current_docs)"
1444 );
1445 db_multi_exec(
1446 "INSERT OR IGNORE INTO ftsdocs(type,rid,name,idxed,label,url,mtime)"
1447 " SELECT 'd', rid, name, 0,"
1448 " printf('Document: %%s',name),"
 
1449 " printf('/doc/%q/%%s',urlencode(name)),"
1450 " %.17g"
1451 " FROM current_docs",
1452 zBrUuid, rTime
1453 );
1454 db_multi_exec(
1455 "INSERT INTO ftsidx(docid,stext)"
1456 " SELECT rowid, stext FROM ftscontent WHERE type='d' AND NOT idxed"
1457 );
1458 db_multi_exec(
1459 "UPDATE ftsdocs SET idxed=1 WHERE type='d' AND NOT idxed"
 
 
 
1460 );
1461 }
1462
1463 /*
1464 ** Deal with all of the unindexed 'c' terms in FTSDOCS
1465 */
1466 static void search_update_checkin_index(void){
1467 db_multi_exec(
1468 "INSERT INTO ftsidx(docid,stext)"
1469 " SELECT rowid, stext('c',rid,NULL) FROM ftsdocs"
1470 " WHERE type='c' AND NOT idxed;"
1471 );
1472 db_multi_exec(
1473 "REPLACE INTO ftsdocs(rowid,idxed,type,rid,name,label,url,mtime)"
1474 " SELECT ftsdocs.rowid, 1, 'c', ftsdocs.rid, NULL,"
@@ -1485,19 +1500,20 @@
1485 /*
1486 ** Deal with all of the unindexed 't' terms in FTSDOCS
1487 */
1488 static void search_update_ticket_index(void){
1489 db_multi_exec(
1490 "INSERT INTO ftsidx(docid,stext)"
1491 " SELECT rowid, stext('t',rid,NULL) FROM ftsdocs"
1492 " WHERE type='t' AND NOT idxed;"
1493 );
1494 if( db_changes()==0 ) return;
1495 db_multi_exec(
1496 "REPLACE INTO ftsdocs(rowid,idxed,type,rid,name,label,url,mtime)"
1497 " SELECT ftsdocs.rowid, 1, 't', ftsdocs.rid, NULL,"
1498 " printf('Ticket [%%.16s] on %%s',tkt_uuid,datetime(tkt_mtime)),"
 
1499 " printf('/tktview/%%.20s',tkt_uuid),"
1500 " tkt_mtime"
1501 " FROM ftsdocs, ticket"
1502 " WHERE ftsdocs.type='t' AND NOT ftsdocs.idxed"
1503 " AND ticket.tkt_id=ftsdocs.rid"
@@ -1507,12 +1523,12 @@
1507 /*
1508 ** Deal with all of the unindexed 'w' terms in FTSDOCS
1509 */
1510 static void search_update_wiki_index(void){
1511 db_multi_exec(
1512 "INSERT INTO ftsidx(docid,stext)"
1513 " SELECT rowid, stext('w',rid,NULL) FROM ftsdocs"
1514 " WHERE type='w' AND NOT idxed;"
1515 );
1516 if( db_changes()==0 ) return;
1517 db_multi_exec(
1518 "REPLACE INTO ftsdocs(rowid,idxed,type,rid,name,label,url,mtime)"
@@ -1565,19 +1581,22 @@
1565 ** Usage: fossil fts-config ?SUBCOMMAND? ?ARGUMENT?
1566 **
1567 ** The "fossil fts-config" command configures the full-text search capabilities
1568 ** of the repository. Subcommands:
1569 **
1570 ** reindex Rebuild the search index. Create it if it does
1571 ** not already exist
1572 **
1573 ** index (on|off) Turn the search index on or off
1574 **
1575 ** enable cdtw Enable various kinds of search. c=Check-ins,
1576 ** d=Documents, t=Tickets, w=Wiki.
1577 **
1578 ** disable cdtw Disable versious kinds of search
 
 
 
1579 **
1580 ** The current search settings are displayed after any changes are applied.
1581 ** Run this command with no arguments to simply see the settings.
1582 */
1583 void test_fts_cmd(void){
@@ -1584,16 +1603,17 @@
1584 static const struct { int iCmd; const char *z; } aCmd[] = {
1585 { 1, "reindex" },
1586 { 2, "index" },
1587 { 3, "disable" },
1588 { 4, "enable" },
 
1589 };
1590 static const struct { char *zSetting; char *zName; char *zSw; } aSetng[] = {
1591 { "search-ckin", "check-in search:", "c" },
1592 { "search-doc", "document search:", "d" },
1593 { "search-tkt", "ticket search:", "t" },
1594 { "search-wiki", "wiki search:", "w" },
1595 };
1596 char *zSubCmd;
1597 int i, j, n;
1598 int iCmd = 0;
1599 int iAction = 0;
@@ -1613,11 +1633,11 @@
1613 return;
1614 }
1615 iCmd = aCmd[i].iCmd;
1616 }
1617 if( iCmd==1 ){
1618 iAction = 2;
1619 }
1620 if( iCmd==2 ){
1621 if( g.argc<3 ) usage("index (on|off)");
1622 iAction = 1 + is_truth(g.argv[3]);
1623 }
@@ -1624,18 +1644,23 @@
1624 db_begin_transaction();
1625
1626 /* Adjust search settings */
1627 if( iCmd==3 || iCmd==4 ){
1628 const char *zCtrl;
1629 if( g.argc<4 ) usage("enable STRING");
1630 zCtrl = g.argv[3];
1631 for(j=0; j<ArraySize(aSetng); j++){
1632 if( strchr(zCtrl, aSetng[j].zSw[0])!=0 ){
1633 db_set_int(aSetng[j].zSetting, iCmd-3, 0);
1634 }
1635 }
1636 }
 
 
 
 
 
1637
1638 /* destroy or rebuild the index, if requested */
1639 if( iAction>=1 ){
1640 search_drop_index();
1641 }
@@ -1646,14 +1671,16 @@
1646 /* Always show the status before ending */
1647 for(i=0; i<ArraySize(aSetng); i++){
1648 fossil_print("%-16s %s\n", aSetng[i].zName,
1649 db_get_boolean(aSetng[i].zSetting,0) ? "on" : "off");
1650 }
 
 
1651 if( search_index_exists() ){
1652 fossil_print("%-16s enabled\n", "full-text index:");
1653 fossil_print("%-16s %d\n", "documents:",
1654 db_int(0, "SELECT count(*) FROM ftsdocs"));
1655 }else{
1656 fossil_print("%-16s disabled\n", "full-text index:");
1657 }
1658 db_end_transaction(0);
1659 }
1660
--- src/search.c
+++ src/search.c
@@ -730,24 +730,32 @@
730 int argc,
731 sqlite3_value **argv
732 ){
733 const unsigned *aVal = (unsigned int*)sqlite3_value_blob(argv[0]);
734 int nVal = sqlite3_value_bytes(argv[0])/4;
735 int nCol; /* Number of columns in the index */
736 int nTerm; /* Number of search terms in the query */
737 int i, j; /* Loop counter */
738 double r = 1.0; /* Score */
739 const unsigned *aX, *aS;
740
741 if( nVal<2 ) return;
 
742 nTerm = aVal[0];
743 nCol = aVal[1];
744 if( nVal<2+3*nCol*nTerm+4*nCol ) return;
745 aS = aVal+2;
746 aX = aS+nCol;
747 for(j=0; j<nCol; j++){
748 r *= 1<<((30*(aS[j]-1))/nTerm);
749 for(i=0; i<nTerm; i++){
750 int hits_this_row = aX[j + i*nCol];
751 int hits_all_rows = aX[j + i*nCol + 1];
752 int rows_with_hit = aX[j + i*nCol + 2];
753 double avg_hits_per_row = (double)hits_all_rows/(double)rows_with_hit;
754 r *= hits_this_row/avg_hits_per_row;
755 }
756 r *= 2.0;
757 }
758 #define SEARCH_DEBUG_RANK 0
759 #if SEARCH_DEBUG_RANK
760 {
761 Blob x;
@@ -1294,20 +1302,21 @@
1302 @ name TEXT, -- Additional document description
1303 @ idxed BOOLEAN, -- True if currently in the index
1304 @ label TEXT, -- Label to print on search results
1305 @ url TEXT, -- URL to access this document
1306 @ mtime DATE, -- Date when document created
1307 @ bx TEXT, -- Temporary "body" content cache
1308 @ UNIQUE(type,rid)
1309 @ );
1310 @ CREATE INDEX "%w".ftsdocIdxed ON ftsdocs(type,rid,name) WHERE idxed==0;
1311 @ CREATE INDEX "%w".ftsdocName ON ftsdocs(name) WHERE type='w';
1312 @ CREATE VIEW IF NOT EXISTS "%w".ftscontent AS
1313 @ SELECT rowid, type, rid, name, idxed, label, url, mtime,
1314 @ title(type,rid,name) AS 'title', body(type,rid,name) AS 'body'
1315 @ FROM ftsdocs;
1316 @ CREATE VIRTUAL TABLE IF NOT EXISTS "%w".ftsidx
1317 @ USING fts4(content="ftscontent", title, body%s);
1318 ;
1319 static const char zFtsDrop[] =
1320 @ DROP TABLE IF EXISTS "%w".ftsidx;
1321 @ DROP VIEW IF EXISTS "%w".ftscontent;
1322 @ DROP TABLE IF EXISTS "%w".ftsdocs;
@@ -1317,13 +1326,15 @@
1326 ** Create or drop the tables associated with a full-text index.
1327 */
1328 static int searchIdxExists = -1;
1329 void search_create_index(void){
1330 const char *zDb = db_name("repository");
1331 int useStemmer = db_get_boolean("search-stemmer",0);
1332 const char *zExtra = useStemmer ? ",tokenize=porter" : "";
1333 search_sql_setup(g.db);
1334 db_multi_exec(zFtsSchema/*works-like:"%w%w%w%w%w%s"*/,
1335 zDb, zDb, zDb, zDb, zDb, zExtra/*safe-for-%s*/);
1336 searchIdxExists = 1;
1337 }
1338 void search_drop_index(void){
1339 const char *zDb = db_name("repository");
1340 db_multi_exec(zFtsDrop/*works-like:"%w%w%w"*/, zDb, zDb, zDb);
@@ -1441,34 +1452,38 @@
1452 db_multi_exec(
1453 "DELETE FROM ftsdocs WHERE type='d'"
1454 " AND rid NOT IN (SELECT rid FROM current_docs)"
1455 );
1456 db_multi_exec(
1457 "INSERT OR IGNORE INTO ftsdocs(type,rid,name,idxed,label,bx,url,mtime)"
1458 " SELECT 'd', rid, name, 0,"
1459 " 'Document: '||title('d',rid,name),"
1460 " body('d',rid,name),"
1461 " printf('/doc/%q/%%s',urlencode(name)),"
1462 " %.17g"
1463 " FROM current_docs",
1464 zBrUuid, rTime
1465 );
1466 db_multi_exec(
1467 "INSERT INTO ftsidx(docid,title,body)"
1468 " SELECT rowid, name, bx FROM ftsdocs WHERE type='d' AND NOT idxed"
1469 );
1470 db_multi_exec(
1471 "UPDATE ftsdocs SET"
1472 " idxed=1,"
1473 " bx=NULL"
1474 " WHERE type='d' AND NOT idxed"
1475 );
1476 }
1477
1478 /*
1479 ** Deal with all of the unindexed 'c' terms in FTSDOCS
1480 */
1481 static void search_update_checkin_index(void){
1482 db_multi_exec(
1483 "INSERT INTO ftsidx(docid,title,body)"
1484 " SELECT rowid, '', body('c',rid,NULL) FROM ftsdocs"
1485 " WHERE type='c' AND NOT idxed;"
1486 );
1487 db_multi_exec(
1488 "REPLACE INTO ftsdocs(rowid,idxed,type,rid,name,label,url,mtime)"
1489 " SELECT ftsdocs.rowid, 1, 'c', ftsdocs.rid, NULL,"
@@ -1485,19 +1500,20 @@
1500 /*
1501 ** Deal with all of the unindexed 't' terms in FTSDOCS
1502 */
1503 static void search_update_ticket_index(void){
1504 db_multi_exec(
1505 "INSERT INTO ftsidx(docid,title,body)"
1506 " SELECT rowid, title('t',rid,NULL), body('t',rid,NULL) FROM ftsdocs"
1507 " WHERE type='t' AND NOT idxed;"
1508 );
1509 if( db_changes()==0 ) return;
1510 db_multi_exec(
1511 "REPLACE INTO ftsdocs(rowid,idxed,type,rid,name,label,url,mtime)"
1512 " SELECT ftsdocs.rowid, 1, 't', ftsdocs.rid, NULL,"
1513 " printf('Ticket: %%s (%%s)',title('t',tkt_id,null),"
1514 " datetime(tkt_mtime)),"
1515 " printf('/tktview/%%.20s',tkt_uuid),"
1516 " tkt_mtime"
1517 " FROM ftsdocs, ticket"
1518 " WHERE ftsdocs.type='t' AND NOT ftsdocs.idxed"
1519 " AND ticket.tkt_id=ftsdocs.rid"
@@ -1507,12 +1523,12 @@
1523 /*
1524 ** Deal with all of the unindexed 'w' terms in FTSDOCS
1525 */
1526 static void search_update_wiki_index(void){
1527 db_multi_exec(
1528 "INSERT INTO ftsidx(docid,title,body)"
1529 " SELECT rowid, title('w',rid,NULL),body('w',rid,NULL) FROM ftsdocs"
1530 " WHERE type='w' AND NOT idxed;"
1531 );
1532 if( db_changes()==0 ) return;
1533 db_multi_exec(
1534 "REPLACE INTO ftsdocs(rowid,idxed,type,rid,name,label,url,mtime)"
@@ -1565,19 +1581,22 @@
1581 ** Usage: fossil fts-config ?SUBCOMMAND? ?ARGUMENT?
1582 **
1583 ** The "fossil fts-config" command configures the full-text search capabilities
1584 ** of the repository. Subcommands:
1585 **
1586 ** reindex Rebuild the search index. This is a no-op if
1587 ** index search is disabled
1588 **
1589 ** index (on|off) Turn the search index on or off
1590 **
1591 ** enable cdtw Enable various kinds of search. c=Check-ins,
1592 ** d=Documents, t=Tickets, w=Wiki.
1593 **
1594 ** disable cdtw Disable versious kinds of search
1595 **
1596 ** stemmer (on|off) Turn the Porter stemmer on or off for indexed
1597 ** search. (Unindexed search is never stemmed.)
1598 **
1599 ** The current search settings are displayed after any changes are applied.
1600 ** Run this command with no arguments to simply see the settings.
1601 */
1602 void test_fts_cmd(void){
@@ -1584,16 +1603,17 @@
1603 static const struct { int iCmd; const char *z; } aCmd[] = {
1604 { 1, "reindex" },
1605 { 2, "index" },
1606 { 3, "disable" },
1607 { 4, "enable" },
1608 { 5, "stemmer" },
1609 };
1610 static const struct { char *zSetting; char *zName; char *zSw; } aSetng[] = {
1611 { "search-ckin", "check-in search:", "c" },
1612 { "search-doc", "document search:", "d" },
1613 { "search-tkt", "ticket search:", "t" },
1614 { "search-wiki", "wiki search:", "w" },
1615 };
1616 char *zSubCmd;
1617 int i, j, n;
1618 int iCmd = 0;
1619 int iAction = 0;
@@ -1613,11 +1633,11 @@
1633 return;
1634 }
1635 iCmd = aCmd[i].iCmd;
1636 }
1637 if( iCmd==1 ){
1638 if( search_index_exists() ) iAction = 2;
1639 }
1640 if( iCmd==2 ){
1641 if( g.argc<3 ) usage("index (on|off)");
1642 iAction = 1 + is_truth(g.argv[3]);
1643 }
@@ -1624,18 +1644,23 @@
1644 db_begin_transaction();
1645
1646 /* Adjust search settings */
1647 if( iCmd==3 || iCmd==4 ){
1648 const char *zCtrl;
1649 if( g.argc<4 ) usage(mprintf("%s STRING",zSubCmd));
1650 zCtrl = g.argv[3];
1651 for(j=0; j<ArraySize(aSetng); j++){
1652 if( strchr(zCtrl, aSetng[j].zSw[0])!=0 ){
1653 db_set_int(aSetng[j].zSetting, iCmd-3, 0);
1654 }
1655 }
1656 }
1657 if( iCmd==5 ){
1658 if( g.argc<4 ) usage("porter ON/OFF");
1659 db_set_int("search-stemmer", is_truth(g.argv[3]), 0);
1660 }
1661
1662
1663 /* destroy or rebuild the index, if requested */
1664 if( iAction>=1 ){
1665 search_drop_index();
1666 }
@@ -1646,14 +1671,16 @@
1671 /* Always show the status before ending */
1672 for(i=0; i<ArraySize(aSetng); i++){
1673 fossil_print("%-16s %s\n", aSetng[i].zName,
1674 db_get_boolean(aSetng[i].zSetting,0) ? "on" : "off");
1675 }
1676 fossil_print("%-16s %s\n", "Porter stemmer:",
1677 db_get_boolean("search-stemmer",0) ? "on" : "off");
1678 if( search_index_exists() ){
1679 fossil_print("%-16s enabled\n", "full-text index:");
1680 fossil_print("%-16s %d\n", "documents:",
1681 db_int(0, "SELECT count(*) FROM ftsdocs"));
1682 }else{
1683 fossil_print("%-16s disabled\n", "full-text index:");
1684 }
1685 db_end_transaction(0);
1686 }
1687
--- src/setup.c
+++ src/setup.c
@@ -2251,16 +2251,18 @@
22512251
search_update_index(search_restrict(SRCH_ALL));
22522252
}
22532253
if( search_index_exists() ){
22542254
@ <p>Currently using an SQLite FTS4 search index. This makes search
22552255
@ run faster, especially on large repositories, but takes up space.</p>
2256
+ onoff_attribute("Use Porter Stemmer","search-stemmer","ss",0,0);
22562257
@ <p><input type="submit" name="fts0" value="Delete The Full-Text Index">
22572258
@ <input type="submit" name="fts1" value="Rebuild The Full-Text Index">
22582259
}else{
22592260
@ <p>The SQLite FTS4 search index is disabled. All searching will be
22602261
@ a full-text scan. This usually works fine, but can be slow for
22612262
@ larger repositories.</p>
2263
+ onoff_attribute("Use Porter Stemmer","search-stemmer","ss",0,0);
22622264
@ <p><input type="submit" name="fts1" value="Create A Full-Text Index">
22632265
}
22642266
@ </div></form>
22652267
style_footer();
22662268
}
22672269
--- src/setup.c
+++ src/setup.c
@@ -2251,16 +2251,18 @@
2251 search_update_index(search_restrict(SRCH_ALL));
2252 }
2253 if( search_index_exists() ){
2254 @ <p>Currently using an SQLite FTS4 search index. This makes search
2255 @ run faster, especially on large repositories, but takes up space.</p>
 
2256 @ <p><input type="submit" name="fts0" value="Delete The Full-Text Index">
2257 @ <input type="submit" name="fts1" value="Rebuild The Full-Text Index">
2258 }else{
2259 @ <p>The SQLite FTS4 search index is disabled. All searching will be
2260 @ a full-text scan. This usually works fine, but can be slow for
2261 @ larger repositories.</p>
 
2262 @ <p><input type="submit" name="fts1" value="Create A Full-Text Index">
2263 }
2264 @ </div></form>
2265 style_footer();
2266 }
2267
--- src/setup.c
+++ src/setup.c
@@ -2251,16 +2251,18 @@
2251 search_update_index(search_restrict(SRCH_ALL));
2252 }
2253 if( search_index_exists() ){
2254 @ <p>Currently using an SQLite FTS4 search index. This makes search
2255 @ run faster, especially on large repositories, but takes up space.</p>
2256 onoff_attribute("Use Porter Stemmer","search-stemmer","ss",0,0);
2257 @ <p><input type="submit" name="fts0" value="Delete The Full-Text Index">
2258 @ <input type="submit" name="fts1" value="Rebuild The Full-Text Index">
2259 }else{
2260 @ <p>The SQLite FTS4 search index is disabled. All searching will be
2261 @ a full-text scan. This usually works fine, but can be slow for
2262 @ larger repositories.</p>
2263 onoff_attribute("Use Porter Stemmer","search-stemmer","ss",0,0);
2264 @ <p><input type="submit" name="fts1" value="Create A Full-Text Index">
2265 }
2266 @ </div></form>
2267 style_footer();
2268 }
2269

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button