| | @@ -730,24 +730,32 @@ |
| 730 | 730 | int argc, |
| 731 | 731 | sqlite3_value **argv |
| 732 | 732 | ){ |
| 733 | 733 | const unsigned *aVal = (unsigned int*)sqlite3_value_blob(argv[0]); |
| 734 | 734 | int nVal = sqlite3_value_bytes(argv[0])/4; |
| 735 | + int nCol; /* Number of columns in the index */ |
| 735 | 736 | int nTerm; /* Number of search terms in the query */ |
| 736 | | - int i; /* Loop counter */ |
| 737 | + int i, j; /* Loop counter */ |
| 737 | 738 | double r = 1.0; /* Score */ |
| 739 | + const unsigned *aX, *aS; |
| 738 | 740 | |
| 739 | | - if( nVal<6 ) return; |
| 740 | | - if( aVal[1]!=1 ) return; |
| 741 | + if( nVal<2 ) return; |
| 741 | 742 | nTerm = aVal[0]; |
| 742 | | - r *= 1<<((30*(aVal[2]-1))/nTerm); |
| 743 | | - for(i=1; i<=nTerm; i++){ |
| 744 | | - int hits_this_row = aVal[3*i]; |
| 745 | | - int hits_all_rows = aVal[3*i+1]; |
| 746 | | - int rows_with_hit = aVal[3*i+2]; |
| 747 | | - double avg_hits_per_row = (double)hits_all_rows/(double)rows_with_hit; |
| 748 | | - r *= hits_this_row/avg_hits_per_row; |
| 743 | + nCol = aVal[1]; |
| 744 | + if( nVal<2+3*nCol*nTerm+4*nCol ) return; |
| 745 | + aS = aVal+2; |
| 746 | + aX = aS+nCol; |
| 747 | + for(j=0; j<nCol; j++){ |
| 748 | + r *= 1<<((30*(aS[j]-1))/nTerm); |
| 749 | + for(i=0; i<nTerm; i++){ |
| 750 | + int hits_this_row = aX[j + i*nCol]; |
| 751 | + int hits_all_rows = aX[j + i*nCol + 1]; |
| 752 | + int rows_with_hit = aX[j + i*nCol + 2]; |
| 753 | + double avg_hits_per_row = (double)hits_all_rows/(double)rows_with_hit; |
| 754 | + r *= hits_this_row/avg_hits_per_row; |
| 755 | + } |
| 756 | + r *= 2.0; |
| 749 | 757 | } |
| 750 | 758 | #define SEARCH_DEBUG_RANK 0 |
| 751 | 759 | #if SEARCH_DEBUG_RANK |
| 752 | 760 | { |
| 753 | 761 | Blob x; |
| | @@ -1294,20 +1302,21 @@ |
| 1294 | 1302 | @ name TEXT, -- Additional document description |
| 1295 | 1303 | @ idxed BOOLEAN, -- True if currently in the index |
| 1296 | 1304 | @ label TEXT, -- Label to print on search results |
| 1297 | 1305 | @ url TEXT, -- URL to access this document |
| 1298 | 1306 | @ mtime DATE, -- Date when document created |
| 1307 | +@ bx TEXT, -- Temporary "body" content cache |
| 1299 | 1308 | @ UNIQUE(type,rid) |
| 1300 | 1309 | @ ); |
| 1301 | 1310 | @ CREATE INDEX "%w".ftsdocIdxed ON ftsdocs(type,rid,name) WHERE idxed==0; |
| 1302 | 1311 | @ CREATE INDEX "%w".ftsdocName ON ftsdocs(name) WHERE type='w'; |
| 1303 | 1312 | @ CREATE VIEW IF NOT EXISTS "%w".ftscontent AS |
| 1304 | 1313 | @ SELECT rowid, type, rid, name, idxed, label, url, mtime, |
| 1305 | | -@ stext(type,rid,name) AS 'stext' |
| 1314 | +@ title(type,rid,name) AS 'title', body(type,rid,name) AS 'body' |
| 1306 | 1315 | @ FROM ftsdocs; |
| 1307 | 1316 | @ CREATE VIRTUAL TABLE IF NOT EXISTS "%w".ftsidx |
| 1308 | | -@ USING fts4(content="ftscontent", stext); |
| 1317 | +@ USING fts4(content="ftscontent", title, body%s); |
| 1309 | 1318 | ; |
| 1310 | 1319 | static const char zFtsDrop[] = |
| 1311 | 1320 | @ DROP TABLE IF EXISTS "%w".ftsidx; |
| 1312 | 1321 | @ DROP VIEW IF EXISTS "%w".ftscontent; |
| 1313 | 1322 | @ DROP TABLE IF EXISTS "%w".ftsdocs; |
| | @@ -1317,13 +1326,15 @@ |
| 1317 | 1326 | ** Create or drop the tables associated with a full-text index. |
| 1318 | 1327 | */ |
| 1319 | 1328 | static int searchIdxExists = -1; |
| 1320 | 1329 | void search_create_index(void){ |
| 1321 | 1330 | const char *zDb = db_name("repository"); |
| 1331 | + int useStemmer = db_get_boolean("search-stemmer",0); |
| 1332 | + const char *zExtra = useStemmer ? ",tokenize=porter" : ""; |
| 1322 | 1333 | search_sql_setup(g.db); |
| 1323 | | - db_multi_exec(zFtsSchema/*works-like:"%w%w%w%w%w"*/, |
| 1324 | | - zDb, zDb, zDb, zDb, zDb); |
| 1334 | + db_multi_exec(zFtsSchema/*works-like:"%w%w%w%w%w%s"*/, |
| 1335 | + zDb, zDb, zDb, zDb, zDb, zExtra/*safe-for-%s*/); |
| 1325 | 1336 | searchIdxExists = 1; |
| 1326 | 1337 | } |
| 1327 | 1338 | void search_drop_index(void){ |
| 1328 | 1339 | const char *zDb = db_name("repository"); |
| 1329 | 1340 | db_multi_exec(zFtsDrop/*works-like:"%w%w%w"*/, zDb, zDb, zDb); |
| | @@ -1441,34 +1452,38 @@ |
| 1441 | 1452 | db_multi_exec( |
| 1442 | 1453 | "DELETE FROM ftsdocs WHERE type='d'" |
| 1443 | 1454 | " AND rid NOT IN (SELECT rid FROM current_docs)" |
| 1444 | 1455 | ); |
| 1445 | 1456 | db_multi_exec( |
| 1446 | | - "INSERT OR IGNORE INTO ftsdocs(type,rid,name,idxed,label,url,mtime)" |
| 1457 | + "INSERT OR IGNORE INTO ftsdocs(type,rid,name,idxed,label,bx,url,mtime)" |
| 1447 | 1458 | " SELECT 'd', rid, name, 0," |
| 1448 | | - " printf('Document: %%s',name)," |
| 1459 | + " 'Document: '||title('d',rid,name)," |
| 1460 | + " body('d',rid,name)," |
| 1449 | 1461 | " printf('/doc/%q/%%s',urlencode(name))," |
| 1450 | 1462 | " %.17g" |
| 1451 | 1463 | " FROM current_docs", |
| 1452 | 1464 | zBrUuid, rTime |
| 1453 | 1465 | ); |
| 1454 | 1466 | db_multi_exec( |
| 1455 | | - "INSERT INTO ftsidx(docid,stext)" |
| 1456 | | - " SELECT rowid, stext FROM ftscontent WHERE type='d' AND NOT idxed" |
| 1467 | + "INSERT INTO ftsidx(docid,title,body)" |
| 1468 | + " SELECT rowid, name, bx FROM ftsdocs WHERE type='d' AND NOT idxed" |
| 1457 | 1469 | ); |
| 1458 | 1470 | db_multi_exec( |
| 1459 | | - "UPDATE ftsdocs SET idxed=1 WHERE type='d' AND NOT idxed" |
| 1471 | + "UPDATE ftsdocs SET" |
| 1472 | + " idxed=1," |
| 1473 | + " bx=NULL" |
| 1474 | + " WHERE type='d' AND NOT idxed" |
| 1460 | 1475 | ); |
| 1461 | 1476 | } |
| 1462 | 1477 | |
| 1463 | 1478 | /* |
| 1464 | 1479 | ** Deal with all of the unindexed 'c' terms in FTSDOCS |
| 1465 | 1480 | */ |
| 1466 | 1481 | static void search_update_checkin_index(void){ |
| 1467 | 1482 | db_multi_exec( |
| 1468 | | - "INSERT INTO ftsidx(docid,stext)" |
| 1469 | | - " SELECT rowid, stext('c',rid,NULL) FROM ftsdocs" |
| 1483 | + "INSERT INTO ftsidx(docid,title,body)" |
| 1484 | + " SELECT rowid, '', body('c',rid,NULL) FROM ftsdocs" |
| 1470 | 1485 | " WHERE type='c' AND NOT idxed;" |
| 1471 | 1486 | ); |
| 1472 | 1487 | db_multi_exec( |
| 1473 | 1488 | "REPLACE INTO ftsdocs(rowid,idxed,type,rid,name,label,url,mtime)" |
| 1474 | 1489 | " SELECT ftsdocs.rowid, 1, 'c', ftsdocs.rid, NULL," |
| | @@ -1485,19 +1500,20 @@ |
| 1485 | 1500 | /* |
| 1486 | 1501 | ** Deal with all of the unindexed 't' terms in FTSDOCS |
| 1487 | 1502 | */ |
| 1488 | 1503 | static void search_update_ticket_index(void){ |
| 1489 | 1504 | db_multi_exec( |
| 1490 | | - "INSERT INTO ftsidx(docid,stext)" |
| 1491 | | - " SELECT rowid, stext('t',rid,NULL) FROM ftsdocs" |
| 1505 | + "INSERT INTO ftsidx(docid,title,body)" |
| 1506 | + " SELECT rowid, title('t',rid,NULL), body('t',rid,NULL) FROM ftsdocs" |
| 1492 | 1507 | " WHERE type='t' AND NOT idxed;" |
| 1493 | 1508 | ); |
| 1494 | 1509 | if( db_changes()==0 ) return; |
| 1495 | 1510 | db_multi_exec( |
| 1496 | 1511 | "REPLACE INTO ftsdocs(rowid,idxed,type,rid,name,label,url,mtime)" |
| 1497 | 1512 | " SELECT ftsdocs.rowid, 1, 't', ftsdocs.rid, NULL," |
| 1498 | | - " printf('Ticket [%%.16s] on %%s',tkt_uuid,datetime(tkt_mtime))," |
| 1513 | + " printf('Ticket: %%s (%%s)',title('t',tkt_id,null)," |
| 1514 | + " datetime(tkt_mtime))," |
| 1499 | 1515 | " printf('/tktview/%%.20s',tkt_uuid)," |
| 1500 | 1516 | " tkt_mtime" |
| 1501 | 1517 | " FROM ftsdocs, ticket" |
| 1502 | 1518 | " WHERE ftsdocs.type='t' AND NOT ftsdocs.idxed" |
| 1503 | 1519 | " AND ticket.tkt_id=ftsdocs.rid" |
| | @@ -1507,12 +1523,12 @@ |
| 1507 | 1523 | /* |
| 1508 | 1524 | ** Deal with all of the unindexed 'w' terms in FTSDOCS |
| 1509 | 1525 | */ |
| 1510 | 1526 | static void search_update_wiki_index(void){ |
| 1511 | 1527 | db_multi_exec( |
| 1512 | | - "INSERT INTO ftsidx(docid,stext)" |
| 1513 | | - " SELECT rowid, stext('w',rid,NULL) FROM ftsdocs" |
| 1528 | + "INSERT INTO ftsidx(docid,title,body)" |
| 1529 | + " SELECT rowid, title('w',rid,NULL),body('w',rid,NULL) FROM ftsdocs" |
| 1514 | 1530 | " WHERE type='w' AND NOT idxed;" |
| 1515 | 1531 | ); |
| 1516 | 1532 | if( db_changes()==0 ) return; |
| 1517 | 1533 | db_multi_exec( |
| 1518 | 1534 | "REPLACE INTO ftsdocs(rowid,idxed,type,rid,name,label,url,mtime)" |
| | @@ -1565,19 +1581,22 @@ |
| 1565 | 1581 | ** Usage: fossil fts-config ?SUBCOMMAND? ?ARGUMENT? |
| 1566 | 1582 | ** |
| 1567 | 1583 | ** The "fossil fts-config" command configures the full-text search capabilities |
| 1568 | 1584 | ** of the repository. Subcommands: |
| 1569 | 1585 | ** |
| 1570 | | -** reindex Rebuild the search index. Create it if it does |
| 1571 | | -** not already exist |
| 1586 | +** reindex Rebuild the search index. This is a no-op if |
| 1587 | +** index search is disabled |
| 1572 | 1588 | ** |
| 1573 | 1589 | ** index (on|off) Turn the search index on or off |
| 1574 | 1590 | ** |
| 1575 | 1591 | ** enable cdtw Enable various kinds of search. c=Check-ins, |
| 1576 | 1592 | ** d=Documents, t=Tickets, w=Wiki. |
| 1577 | 1593 | ** |
| 1578 | 1594 | ** disable cdtw Disable versious kinds of search |
| 1595 | +** |
| 1596 | +** stemmer (on|off) Turn the Porter stemmer on or off for indexed |
| 1597 | +** search. (Unindexed search is never stemmed.) |
| 1579 | 1598 | ** |
| 1580 | 1599 | ** The current search settings are displayed after any changes are applied. |
| 1581 | 1600 | ** Run this command with no arguments to simply see the settings. |
| 1582 | 1601 | */ |
| 1583 | 1602 | void test_fts_cmd(void){ |
| | @@ -1584,16 +1603,17 @@ |
| 1584 | 1603 | static const struct { int iCmd; const char *z; } aCmd[] = { |
| 1585 | 1604 | { 1, "reindex" }, |
| 1586 | 1605 | { 2, "index" }, |
| 1587 | 1606 | { 3, "disable" }, |
| 1588 | 1607 | { 4, "enable" }, |
| 1608 | + { 5, "stemmer" }, |
| 1589 | 1609 | }; |
| 1590 | 1610 | static const struct { char *zSetting; char *zName; char *zSw; } aSetng[] = { |
| 1591 | | - { "search-ckin", "check-in search:", "c" }, |
| 1592 | | - { "search-doc", "document search:", "d" }, |
| 1593 | | - { "search-tkt", "ticket search:", "t" }, |
| 1594 | | - { "search-wiki", "wiki search:", "w" }, |
| 1611 | + { "search-ckin", "check-in search:", "c" }, |
| 1612 | + { "search-doc", "document search:", "d" }, |
| 1613 | + { "search-tkt", "ticket search:", "t" }, |
| 1614 | + { "search-wiki", "wiki search:", "w" }, |
| 1595 | 1615 | }; |
| 1596 | 1616 | char *zSubCmd; |
| 1597 | 1617 | int i, j, n; |
| 1598 | 1618 | int iCmd = 0; |
| 1599 | 1619 | int iAction = 0; |
| | @@ -1613,11 +1633,11 @@ |
| 1613 | 1633 | return; |
| 1614 | 1634 | } |
| 1615 | 1635 | iCmd = aCmd[i].iCmd; |
| 1616 | 1636 | } |
| 1617 | 1637 | if( iCmd==1 ){ |
| 1618 | | - iAction = 2; |
| 1638 | + if( search_index_exists() ) iAction = 2; |
| 1619 | 1639 | } |
| 1620 | 1640 | if( iCmd==2 ){ |
| 1621 | 1641 | if( g.argc<3 ) usage("index (on|off)"); |
| 1622 | 1642 | iAction = 1 + is_truth(g.argv[3]); |
| 1623 | 1643 | } |
| | @@ -1624,18 +1644,23 @@ |
| 1624 | 1644 | db_begin_transaction(); |
| 1625 | 1645 | |
| 1626 | 1646 | /* Adjust search settings */ |
| 1627 | 1647 | if( iCmd==3 || iCmd==4 ){ |
| 1628 | 1648 | const char *zCtrl; |
| 1629 | | - if( g.argc<4 ) usage("enable STRING"); |
| 1649 | + if( g.argc<4 ) usage(mprintf("%s STRING",zSubCmd)); |
| 1630 | 1650 | zCtrl = g.argv[3]; |
| 1631 | 1651 | for(j=0; j<ArraySize(aSetng); j++){ |
| 1632 | 1652 | if( strchr(zCtrl, aSetng[j].zSw[0])!=0 ){ |
| 1633 | 1653 | db_set_int(aSetng[j].zSetting, iCmd-3, 0); |
| 1634 | 1654 | } |
| 1635 | 1655 | } |
| 1636 | 1656 | } |
| 1657 | + if( iCmd==5 ){ |
| 1658 | + if( g.argc<4 ) usage("porter ON/OFF"); |
| 1659 | + db_set_int("search-stemmer", is_truth(g.argv[3]), 0); |
| 1660 | + } |
| 1661 | + |
| 1637 | 1662 | |
| 1638 | 1663 | /* destroy or rebuild the index, if requested */ |
| 1639 | 1664 | if( iAction>=1 ){ |
| 1640 | 1665 | search_drop_index(); |
| 1641 | 1666 | } |
| | @@ -1646,14 +1671,16 @@ |
| 1646 | 1671 | /* Always show the status before ending */ |
| 1647 | 1672 | for(i=0; i<ArraySize(aSetng); i++){ |
| 1648 | 1673 | fossil_print("%-16s %s\n", aSetng[i].zName, |
| 1649 | 1674 | db_get_boolean(aSetng[i].zSetting,0) ? "on" : "off"); |
| 1650 | 1675 | } |
| 1676 | + fossil_print("%-16s %s\n", "Porter stemmer:", |
| 1677 | + db_get_boolean("search-stemmer",0) ? "on" : "off"); |
| 1651 | 1678 | if( search_index_exists() ){ |
| 1652 | 1679 | fossil_print("%-16s enabled\n", "full-text index:"); |
| 1653 | 1680 | fossil_print("%-16s %d\n", "documents:", |
| 1654 | 1681 | db_int(0, "SELECT count(*) FROM ftsdocs")); |
| 1655 | 1682 | }else{ |
| 1656 | 1683 | fossil_print("%-16s disabled\n", "full-text index:"); |
| 1657 | 1684 | } |
| 1658 | 1685 | db_end_transaction(0); |
| 1659 | 1686 | } |
| 1660 | 1687 | |