Fossil SCM
Add FTS tokenizer selection to the /srchsetup page.
Commit
9e52f8c050bf6754f029abacffd9fbf5e97690d3ca27a7cea2ec06feb375efd6
Parent
bcff4d81bb617ac…
2 files changed
+25
-10
+19
-2
+25
-10
| --- src/search.c | ||
| +++ src/search.c | ||
| @@ -1555,44 +1555,50 @@ | ||
| 1555 | 1555 | #define FTS5TOK_NONE 0 /* no FTS stemmer */ |
| 1556 | 1556 | #define FTS5TOK_PORTER 1 /* porter stemmer */ |
| 1557 | 1557 | #define FTS5TOK_TRIGRAM 3 /* trigram stemmer */ |
| 1558 | 1558 | #endif |
| 1559 | 1559 | |
| 1560 | +/* | |
| 1561 | +** Cached FTS5TOK_xyz value for search_tokenizer_type() and | |
| 1562 | +** friends. | |
| 1563 | +*/ | |
| 1564 | +static int iFtsTokenizer = -1; | |
| 1565 | + | |
| 1560 | 1566 | /* |
| 1561 | 1567 | ** Returns one of the FTS5TOK_xyz values, depending on the value of |
| 1562 | 1568 | ** the search-tokenizer config entry, defaulting to FTS5TOK_NONE. The |
| 1563 | 1569 | ** result of the first call is cached for subsequent calls unless |
| 1564 | 1570 | ** bRecheck is true. |
| 1565 | 1571 | */ |
| 1566 | 1572 | int search_tokenizer_type(int bRecheck){ |
| 1567 | - static int iStemmer = -1; | |
| 1568 | 1573 | char *z; |
| 1569 | - if( iStemmer>=0 && bRecheck==0 ){ | |
| 1570 | - return iStemmer; | |
| 1574 | + if( iFtsTokenizer>=0 && bRecheck==0 ){ | |
| 1575 | + return iFtsTokenizer; | |
| 1571 | 1576 | } |
| 1572 | 1577 | z = db_get("search-tokenizer",0); |
| 1573 | 1578 | if( 0==z ){ |
| 1574 | - iStemmer = FTS5TOK_NONE; | |
| 1579 | + iFtsTokenizer = FTS5TOK_NONE; | |
| 1575 | 1580 | }else if(0==fossil_strcmp(z,"porter")){ |
| 1576 | - iStemmer = FTS5TOK_PORTER; | |
| 1581 | + iFtsTokenizer = FTS5TOK_PORTER; | |
| 1577 | 1582 | }else if(0==fossil_strcmp(z,"trigram")){ |
| 1578 | - iStemmer = FTS5TOK_TRIGRAM; | |
| 1583 | + iFtsTokenizer = FTS5TOK_TRIGRAM; | |
| 1579 | 1584 | }else{ |
| 1580 | - iStemmer = is_truth(z) ? FTS5TOK_PORTER : FTS5TOK_NONE; | |
| 1585 | + iFtsTokenizer = is_truth(z) ? FTS5TOK_PORTER : FTS5TOK_NONE; | |
| 1581 | 1586 | } |
| 1582 | 1587 | fossil_free(z); |
| 1583 | - return iStemmer; | |
| 1588 | + return iFtsTokenizer; | |
| 1584 | 1589 | } |
| 1585 | 1590 | |
| 1586 | 1591 | /* |
| 1587 | 1592 | ** Returns a string value suitable for use as the search-tokenizer |
| 1588 | 1593 | ** setting's value, depending on the value of z. If z is 0 then the |
| 1589 | 1594 | ** current search-tokenizer value is used as the basis for formulating |
| 1590 | 1595 | ** the result (which may differ from the current value but will have |
| 1591 | -** the same meaning). | |
| 1596 | +** the same meaning). Any unknown/unsupported value is interpreted as | |
| 1597 | +** "off". | |
| 1592 | 1598 | */ |
| 1593 | -static const char *search_tokenizer_for_string(const char *z){ | |
| 1599 | +const char *search_tokenizer_for_string(const char *z){ | |
| 1594 | 1600 | char * zTmp = 0; |
| 1595 | 1601 | const char *zRc = 0; |
| 1596 | 1602 | |
| 1597 | 1603 | if( 0==z ){ |
| 1598 | 1604 | z = zTmp = db_get("search-tokenizer",0); |
| @@ -1607,10 +1613,19 @@ | ||
| 1607 | 1613 | zRc = is_truth(z) ? "porter" : "off"; |
| 1608 | 1614 | } |
| 1609 | 1615 | fossil_free(zTmp); |
| 1610 | 1616 | return zRc; |
| 1611 | 1617 | } |
| 1618 | + | |
| 1619 | +/* | |
| 1620 | +** Sets the search-tokenizer config setting to the value of | |
| 1621 | +** search_tokenizer_for_string(zName). | |
| 1622 | +*/ | |
| 1623 | +void search_set_tokenizer(const char *zName){ | |
| 1624 | + db_set("search-tokenizer", search_tokenizer_for_string( zName ), 0); | |
| 1625 | + iFtsTokenizer = -1; | |
| 1626 | +} | |
| 1612 | 1627 | |
| 1613 | 1628 | /* |
| 1614 | 1629 | ** Create or drop the tables associated with a full-text index. |
| 1615 | 1630 | */ |
| 1616 | 1631 | static int searchIdxExists = -1; |
| 1617 | 1632 |
| --- src/search.c | |
| +++ src/search.c | |
| @@ -1555,44 +1555,50 @@ | |
| 1555 | #define FTS5TOK_NONE 0 /* no FTS stemmer */ |
| 1556 | #define FTS5TOK_PORTER 1 /* porter stemmer */ |
| 1557 | #define FTS5TOK_TRIGRAM 3 /* trigram stemmer */ |
| 1558 | #endif |
| 1559 | |
| 1560 | /* |
| 1561 | ** Returns one of the FTS5TOK_xyz values, depending on the value of |
| 1562 | ** the search-tokenizer config entry, defaulting to FTS5TOK_NONE. The |
| 1563 | ** result of the first call is cached for subsequent calls unless |
| 1564 | ** bRecheck is true. |
| 1565 | */ |
| 1566 | int search_tokenizer_type(int bRecheck){ |
| 1567 | static int iStemmer = -1; |
| 1568 | char *z; |
| 1569 | if( iStemmer>=0 && bRecheck==0 ){ |
| 1570 | return iStemmer; |
| 1571 | } |
| 1572 | z = db_get("search-tokenizer",0); |
| 1573 | if( 0==z ){ |
| 1574 | iStemmer = FTS5TOK_NONE; |
| 1575 | }else if(0==fossil_strcmp(z,"porter")){ |
| 1576 | iStemmer = FTS5TOK_PORTER; |
| 1577 | }else if(0==fossil_strcmp(z,"trigram")){ |
| 1578 | iStemmer = FTS5TOK_TRIGRAM; |
| 1579 | }else{ |
| 1580 | iStemmer = is_truth(z) ? FTS5TOK_PORTER : FTS5TOK_NONE; |
| 1581 | } |
| 1582 | fossil_free(z); |
| 1583 | return iStemmer; |
| 1584 | } |
| 1585 | |
| 1586 | /* |
| 1587 | ** Returns a string value suitable for use as the search-tokenizer |
| 1588 | ** setting's value, depending on the value of z. If z is 0 then the |
| 1589 | ** current search-tokenizer value is used as the basis for formulating |
| 1590 | ** the result (which may differ from the current value but will have |
| 1591 | ** the same meaning). |
| 1592 | */ |
| 1593 | static const char *search_tokenizer_for_string(const char *z){ |
| 1594 | char * zTmp = 0; |
| 1595 | const char *zRc = 0; |
| 1596 | |
| 1597 | if( 0==z ){ |
| 1598 | z = zTmp = db_get("search-tokenizer",0); |
| @@ -1607,10 +1613,19 @@ | |
| 1607 | zRc = is_truth(z) ? "porter" : "off"; |
| 1608 | } |
| 1609 | fossil_free(zTmp); |
| 1610 | return zRc; |
| 1611 | } |
| 1612 | |
| 1613 | /* |
| 1614 | ** Create or drop the tables associated with a full-text index. |
| 1615 | */ |
| 1616 | static int searchIdxExists = -1; |
| 1617 |
| --- src/search.c | |
| +++ src/search.c | |
| @@ -1555,44 +1555,50 @@ | |
| 1555 | #define FTS5TOK_NONE 0 /* no FTS stemmer */ |
| 1556 | #define FTS5TOK_PORTER 1 /* porter stemmer */ |
| 1557 | #define FTS5TOK_TRIGRAM 3 /* trigram stemmer */ |
| 1558 | #endif |
| 1559 | |
| 1560 | /* |
| 1561 | ** Cached FTS5TOK_xyz value for search_tokenizer_type() and |
| 1562 | ** friends. |
| 1563 | */ |
| 1564 | static int iFtsTokenizer = -1; |
| 1565 | |
| 1566 | /* |
| 1567 | ** Returns one of the FTS5TOK_xyz values, depending on the value of |
| 1568 | ** the search-tokenizer config entry, defaulting to FTS5TOK_NONE. The |
| 1569 | ** result of the first call is cached for subsequent calls unless |
| 1570 | ** bRecheck is true. |
| 1571 | */ |
| 1572 | int search_tokenizer_type(int bRecheck){ |
| 1573 | char *z; |
| 1574 | if( iFtsTokenizer>=0 && bRecheck==0 ){ |
| 1575 | return iFtsTokenizer; |
| 1576 | } |
| 1577 | z = db_get("search-tokenizer",0); |
| 1578 | if( 0==z ){ |
| 1579 | iFtsTokenizer = FTS5TOK_NONE; |
| 1580 | }else if(0==fossil_strcmp(z,"porter")){ |
| 1581 | iFtsTokenizer = FTS5TOK_PORTER; |
| 1582 | }else if(0==fossil_strcmp(z,"trigram")){ |
| 1583 | iFtsTokenizer = FTS5TOK_TRIGRAM; |
| 1584 | }else{ |
| 1585 | iFtsTokenizer = is_truth(z) ? FTS5TOK_PORTER : FTS5TOK_NONE; |
| 1586 | } |
| 1587 | fossil_free(z); |
| 1588 | return iFtsTokenizer; |
| 1589 | } |
| 1590 | |
| 1591 | /* |
| 1592 | ** Returns a string value suitable for use as the search-tokenizer |
| 1593 | ** setting's value, depending on the value of z. If z is 0 then the |
| 1594 | ** current search-tokenizer value is used as the basis for formulating |
| 1595 | ** the result (which may differ from the current value but will have |
| 1596 | ** the same meaning). Any unknown/unsupported value is interpreted as |
| 1597 | ** "off". |
| 1598 | */ |
| 1599 | const char *search_tokenizer_for_string(const char *z){ |
| 1600 | char * zTmp = 0; |
| 1601 | const char *zRc = 0; |
| 1602 | |
| 1603 | if( 0==z ){ |
| 1604 | z = zTmp = db_get("search-tokenizer",0); |
| @@ -1607,10 +1613,19 @@ | |
| 1613 | zRc = is_truth(z) ? "porter" : "off"; |
| 1614 | } |
| 1615 | fossil_free(zTmp); |
| 1616 | return zRc; |
| 1617 | } |
| 1618 | |
| 1619 | /* |
| 1620 | ** Sets the search-tokenizer config setting to the value of |
| 1621 | ** search_tokenizer_for_string(zName). |
| 1622 | */ |
| 1623 | void search_set_tokenizer(const char *zName){ |
| 1624 | db_set("search-tokenizer", search_tokenizer_for_string( zName ), 0); |
| 1625 | iFtsTokenizer = -1; |
| 1626 | } |
| 1627 | |
| 1628 | /* |
| 1629 | ** Create or drop the tables associated with a full-text index. |
| 1630 | */ |
| 1631 | static int searchIdxExists = -1; |
| 1632 |
+19
-2
| --- src/setup.c | ||
| +++ src/setup.c | ||
| @@ -2005,10 +2005,25 @@ | ||
| 2005 | 2005 | @ <p><a href="admin_log?n=%d(limit)&x=%d(limit+ofst)">[Older]</a></p> |
| 2006 | 2006 | } |
| 2007 | 2007 | style_finish_page(); |
| 2008 | 2008 | } |
| 2009 | 2009 | |
| 2010 | + | |
| 2011 | +/* | |
| 2012 | +** Renders a selection list of values for the search-tokenizer | |
| 2013 | +** setting, using the form field name "ftstok". | |
| 2014 | +*/ | |
| 2015 | +static void select_fts_tokenizer(void){ | |
| 2016 | + const char *const aTokenizer[] = { | |
| 2017 | + "off", "None", | |
| 2018 | + "porter", "Porter Stemmer", | |
| 2019 | + "trigram", "Trigram" | |
| 2020 | + }; | |
| 2021 | + multiple_choice_attribute("FTS Tokenizer", "search-tokenizer", | |
| 2022 | + "ftstok", "off", 3, aTokenizer); | |
| 2023 | +} | |
| 2024 | + | |
| 2010 | 2025 | /* |
| 2011 | 2026 | ** WEBPAGE: srchsetup |
| 2012 | 2027 | ** |
| 2013 | 2028 | ** Configure the search engine. Requires Admin privilege. |
| 2014 | 2029 | */ |
| @@ -2063,28 +2078,30 @@ | ||
| 2063 | 2078 | @ <p><input type="submit" name="submit" value="Apply Changes" /></p> |
| 2064 | 2079 | @ <hr /> |
| 2065 | 2080 | if( P("fts0") ){ |
| 2066 | 2081 | search_drop_index(); |
| 2067 | 2082 | }else if( P("fts1") ){ |
| 2083 | + const char *zTokenizer = PD("ftstok","off"); | |
| 2084 | + search_set_tokenizer(zTokenizer); | |
| 2068 | 2085 | search_drop_index(); |
| 2069 | 2086 | search_create_index(); |
| 2070 | 2087 | search_fill_index(); |
| 2071 | 2088 | search_update_index(search_restrict(SRCH_ALL)); |
| 2072 | 2089 | } |
| 2073 | 2090 | if( search_index_exists() ){ |
| 2074 | 2091 | @ <p>Currently using an SQLite FTS%d(search_index_type(0)) search index. |
| 2075 | 2092 | @ The index helps search run faster, especially on large repositories, |
| 2076 | 2093 | @ but takes up space.</p> |
| 2077 | - onoff_attribute("Use Porter Stemmer","search-stemmer","ss",0,0); | |
| 2094 | + select_fts_tokenizer(); | |
| 2078 | 2095 | @ <p><input type="submit" name="fts0" value="Delete The Full-Text Index"> |
| 2079 | 2096 | @ <input type="submit" name="fts1" value="Rebuild The Full-Text Index"> |
| 2080 | 2097 | style_submenu_element("FTS Index Debugging","%R/test-ftsdocs"); |
| 2081 | 2098 | }else{ |
| 2082 | 2099 | @ <p>The SQLite search index is disabled. All searching will be |
| 2083 | 2100 | @ a full-text scan. This usually works fine, but can be slow for |
| 2084 | 2101 | @ larger repositories.</p> |
| 2085 | - onoff_attribute("Use Porter Stemmer","search-stemmer","ss",0,0); | |
| 2102 | + select_fts_tokenizer(); | |
| 2086 | 2103 | @ <p><input type="submit" name="fts1" value="Create A Full-Text Index"> |
| 2087 | 2104 | } |
| 2088 | 2105 | @ </div></form> |
| 2089 | 2106 | style_finish_page(); |
| 2090 | 2107 | } |
| 2091 | 2108 |
| --- src/setup.c | |
| +++ src/setup.c | |
| @@ -2005,10 +2005,25 @@ | |
| 2005 | @ <p><a href="admin_log?n=%d(limit)&x=%d(limit+ofst)">[Older]</a></p> |
| 2006 | } |
| 2007 | style_finish_page(); |
| 2008 | } |
| 2009 | |
| 2010 | /* |
| 2011 | ** WEBPAGE: srchsetup |
| 2012 | ** |
| 2013 | ** Configure the search engine. Requires Admin privilege. |
| 2014 | */ |
| @@ -2063,28 +2078,30 @@ | |
| 2063 | @ <p><input type="submit" name="submit" value="Apply Changes" /></p> |
| 2064 | @ <hr /> |
| 2065 | if( P("fts0") ){ |
| 2066 | search_drop_index(); |
| 2067 | }else if( P("fts1") ){ |
| 2068 | search_drop_index(); |
| 2069 | search_create_index(); |
| 2070 | search_fill_index(); |
| 2071 | search_update_index(search_restrict(SRCH_ALL)); |
| 2072 | } |
| 2073 | if( search_index_exists() ){ |
| 2074 | @ <p>Currently using an SQLite FTS%d(search_index_type(0)) search index. |
| 2075 | @ The index helps search run faster, especially on large repositories, |
| 2076 | @ but takes up space.</p> |
| 2077 | onoff_attribute("Use Porter Stemmer","search-stemmer","ss",0,0); |
| 2078 | @ <p><input type="submit" name="fts0" value="Delete The Full-Text Index"> |
| 2079 | @ <input type="submit" name="fts1" value="Rebuild The Full-Text Index"> |
| 2080 | style_submenu_element("FTS Index Debugging","%R/test-ftsdocs"); |
| 2081 | }else{ |
| 2082 | @ <p>The SQLite search index is disabled. All searching will be |
| 2083 | @ a full-text scan. This usually works fine, but can be slow for |
| 2084 | @ larger repositories.</p> |
| 2085 | onoff_attribute("Use Porter Stemmer","search-stemmer","ss",0,0); |
| 2086 | @ <p><input type="submit" name="fts1" value="Create A Full-Text Index"> |
| 2087 | } |
| 2088 | @ </div></form> |
| 2089 | style_finish_page(); |
| 2090 | } |
| 2091 |
| --- src/setup.c | |
| +++ src/setup.c | |
| @@ -2005,10 +2005,25 @@ | |
| 2005 | @ <p><a href="admin_log?n=%d(limit)&x=%d(limit+ofst)">[Older]</a></p> |
| 2006 | } |
| 2007 | style_finish_page(); |
| 2008 | } |
| 2009 | |
| 2010 | |
| 2011 | /* |
| 2012 | ** Renders a selection list of values for the search-tokenizer |
| 2013 | ** setting, using the form field name "ftstok". |
| 2014 | */ |
| 2015 | static void select_fts_tokenizer(void){ |
| 2016 | const char *const aTokenizer[] = { |
| 2017 | "off", "None", |
| 2018 | "porter", "Porter Stemmer", |
| 2019 | "trigram", "Trigram" |
| 2020 | }; |
| 2021 | multiple_choice_attribute("FTS Tokenizer", "search-tokenizer", |
| 2022 | "ftstok", "off", 3, aTokenizer); |
| 2023 | } |
| 2024 | |
| 2025 | /* |
| 2026 | ** WEBPAGE: srchsetup |
| 2027 | ** |
| 2028 | ** Configure the search engine. Requires Admin privilege. |
| 2029 | */ |
| @@ -2063,28 +2078,30 @@ | |
| 2078 | @ <p><input type="submit" name="submit" value="Apply Changes" /></p> |
| 2079 | @ <hr /> |
| 2080 | if( P("fts0") ){ |
| 2081 | search_drop_index(); |
| 2082 | }else if( P("fts1") ){ |
| 2083 | const char *zTokenizer = PD("ftstok","off"); |
| 2084 | search_set_tokenizer(zTokenizer); |
| 2085 | search_drop_index(); |
| 2086 | search_create_index(); |
| 2087 | search_fill_index(); |
| 2088 | search_update_index(search_restrict(SRCH_ALL)); |
| 2089 | } |
| 2090 | if( search_index_exists() ){ |
| 2091 | @ <p>Currently using an SQLite FTS%d(search_index_type(0)) search index. |
| 2092 | @ The index helps search run faster, especially on large repositories, |
| 2093 | @ but takes up space.</p> |
| 2094 | select_fts_tokenizer(); |
| 2095 | @ <p><input type="submit" name="fts0" value="Delete The Full-Text Index"> |
| 2096 | @ <input type="submit" name="fts1" value="Rebuild The Full-Text Index"> |
| 2097 | style_submenu_element("FTS Index Debugging","%R/test-ftsdocs"); |
| 2098 | }else{ |
| 2099 | @ <p>The SQLite search index is disabled. All searching will be |
| 2100 | @ a full-text scan. This usually works fine, but can be slow for |
| 2101 | @ larger repositories.</p> |
| 2102 | select_fts_tokenizer(); |
| 2103 | @ <p><input type="submit" name="fts1" value="Create A Full-Text Index"> |
| 2104 | } |
| 2105 | @ </div></form> |
| 2106 | style_finish_page(); |
| 2107 | } |
| 2108 |