Fossil SCM

Add FTS tokenizer selection to the /srchsetup page.

stephan 2023-04-18 17:45 fts5-trigram
Commit 9e52f8c050bf6754f029abacffd9fbf5e97690d3ca27a7cea2ec06feb375efd6
2 files changed +25 -10 +19 -2
+25 -10
--- src/search.c
+++ src/search.c
@@ -1555,44 +1555,50 @@
15551555
#define FTS5TOK_NONE 0 /* no FTS stemmer */
15561556
#define FTS5TOK_PORTER 1 /* porter stemmer */
15571557
#define FTS5TOK_TRIGRAM 3 /* trigram stemmer */
15581558
#endif
15591559
1560
+/*
1561
+** Cached FTS5TOK_xyz value for search_tokenizer_type() and
1562
+** friends.
1563
+*/
1564
+static int iFtsTokenizer = -1;
1565
+
15601566
/*
15611567
** Returns one of the FTS5TOK_xyz values, depending on the value of
15621568
** the search-tokenizer config entry, defaulting to FTS5TOK_NONE. The
15631569
** result of the first call is cached for subsequent calls unless
15641570
** bRecheck is true.
15651571
*/
15661572
int search_tokenizer_type(int bRecheck){
1567
- static int iStemmer = -1;
15681573
char *z;
1569
- if( iStemmer>=0 && bRecheck==0 ){
1570
- return iStemmer;
1574
+ if( iFtsTokenizer>=0 && bRecheck==0 ){
1575
+ return iFtsTokenizer;
15711576
}
15721577
z = db_get("search-tokenizer",0);
15731578
if( 0==z ){
1574
- iStemmer = FTS5TOK_NONE;
1579
+ iFtsTokenizer = FTS5TOK_NONE;
15751580
}else if(0==fossil_strcmp(z,"porter")){
1576
- iStemmer = FTS5TOK_PORTER;
1581
+ iFtsTokenizer = FTS5TOK_PORTER;
15771582
}else if(0==fossil_strcmp(z,"trigram")){
1578
- iStemmer = FTS5TOK_TRIGRAM;
1583
+ iFtsTokenizer = FTS5TOK_TRIGRAM;
15791584
}else{
1580
- iStemmer = is_truth(z) ? FTS5TOK_PORTER : FTS5TOK_NONE;
1585
+ iFtsTokenizer = is_truth(z) ? FTS5TOK_PORTER : FTS5TOK_NONE;
15811586
}
15821587
fossil_free(z);
1583
- return iStemmer;
1588
+ return iFtsTokenizer;
15841589
}
15851590
15861591
/*
15871592
** Returns a string value suitable for use as the search-tokenizer
15881593
** setting's value, depending on the value of z. If z is 0 then the
15891594
** current search-tokenizer value is used as the basis for formulating
15901595
** the result (which may differ from the current value but will have
1591
-** the same meaning).
1596
+** the same meaning). Any unknown/unsupported value is interpreted as
1597
+** "off".
15921598
*/
1593
-static const char *search_tokenizer_for_string(const char *z){
1599
+const char *search_tokenizer_for_string(const char *z){
15941600
char * zTmp = 0;
15951601
const char *zRc = 0;
15961602
15971603
if( 0==z ){
15981604
z = zTmp = db_get("search-tokenizer",0);
@@ -1607,10 +1613,19 @@
16071613
zRc = is_truth(z) ? "porter" : "off";
16081614
}
16091615
fossil_free(zTmp);
16101616
return zRc;
16111617
}
1618
+
1619
+/*
1620
+** Sets the search-tokenizer config setting to the value of
1621
+** search_tokenizer_for_string(zName).
1622
+*/
1623
+void search_set_tokenizer(const char *zName){
1624
+ db_set("search-tokenizer", search_tokenizer_for_string( zName ), 0);
1625
+ iFtsTokenizer = -1;
1626
+}
16121627
16131628
/*
16141629
** Create or drop the tables associated with a full-text index.
16151630
*/
16161631
static int searchIdxExists = -1;
16171632
--- src/search.c
+++ src/search.c
@@ -1555,44 +1555,50 @@
1555 #define FTS5TOK_NONE 0 /* no FTS stemmer */
1556 #define FTS5TOK_PORTER 1 /* porter stemmer */
1557 #define FTS5TOK_TRIGRAM 3 /* trigram stemmer */
1558 #endif
1559
 
 
 
 
 
 
1560 /*
1561 ** Returns one of the FTS5TOK_xyz values, depending on the value of
1562 ** the search-tokenizer config entry, defaulting to FTS5TOK_NONE. The
1563 ** result of the first call is cached for subsequent calls unless
1564 ** bRecheck is true.
1565 */
1566 int search_tokenizer_type(int bRecheck){
1567 static int iStemmer = -1;
1568 char *z;
1569 if( iStemmer>=0 && bRecheck==0 ){
1570 return iStemmer;
1571 }
1572 z = db_get("search-tokenizer",0);
1573 if( 0==z ){
1574 iStemmer = FTS5TOK_NONE;
1575 }else if(0==fossil_strcmp(z,"porter")){
1576 iStemmer = FTS5TOK_PORTER;
1577 }else if(0==fossil_strcmp(z,"trigram")){
1578 iStemmer = FTS5TOK_TRIGRAM;
1579 }else{
1580 iStemmer = is_truth(z) ? FTS5TOK_PORTER : FTS5TOK_NONE;
1581 }
1582 fossil_free(z);
1583 return iStemmer;
1584 }
1585
1586 /*
1587 ** Returns a string value suitable for use as the search-tokenizer
1588 ** setting's value, depending on the value of z. If z is 0 then the
1589 ** current search-tokenizer value is used as the basis for formulating
1590 ** the result (which may differ from the current value but will have
1591 ** the same meaning).
 
1592 */
1593 static const char *search_tokenizer_for_string(const char *z){
1594 char * zTmp = 0;
1595 const char *zRc = 0;
1596
1597 if( 0==z ){
1598 z = zTmp = db_get("search-tokenizer",0);
@@ -1607,10 +1613,19 @@
1607 zRc = is_truth(z) ? "porter" : "off";
1608 }
1609 fossil_free(zTmp);
1610 return zRc;
1611 }
 
 
 
 
 
 
 
 
 
1612
1613 /*
1614 ** Create or drop the tables associated with a full-text index.
1615 */
1616 static int searchIdxExists = -1;
1617
--- src/search.c
+++ src/search.c
@@ -1555,44 +1555,50 @@
1555 #define FTS5TOK_NONE 0 /* no FTS stemmer */
1556 #define FTS5TOK_PORTER 1 /* porter stemmer */
1557 #define FTS5TOK_TRIGRAM 3 /* trigram stemmer */
1558 #endif
1559
1560 /*
1561 ** Cached FTS5TOK_xyz value for search_tokenizer_type() and
1562 ** friends.
1563 */
1564 static int iFtsTokenizer = -1;
1565
1566 /*
1567 ** Returns one of the FTS5TOK_xyz values, depending on the value of
1568 ** the search-tokenizer config entry, defaulting to FTS5TOK_NONE. The
1569 ** result of the first call is cached for subsequent calls unless
1570 ** bRecheck is true.
1571 */
1572 int search_tokenizer_type(int bRecheck){
 
1573 char *z;
1574 if( iFtsTokenizer>=0 && bRecheck==0 ){
1575 return iFtsTokenizer;
1576 }
1577 z = db_get("search-tokenizer",0);
1578 if( 0==z ){
1579 iFtsTokenizer = FTS5TOK_NONE;
1580 }else if(0==fossil_strcmp(z,"porter")){
1581 iFtsTokenizer = FTS5TOK_PORTER;
1582 }else if(0==fossil_strcmp(z,"trigram")){
1583 iFtsTokenizer = FTS5TOK_TRIGRAM;
1584 }else{
1585 iFtsTokenizer = is_truth(z) ? FTS5TOK_PORTER : FTS5TOK_NONE;
1586 }
1587 fossil_free(z);
1588 return iFtsTokenizer;
1589 }
1590
1591 /*
1592 ** Returns a string value suitable for use as the search-tokenizer
1593 ** setting's value, depending on the value of z. If z is 0 then the
1594 ** current search-tokenizer value is used as the basis for formulating
1595 ** the result (which may differ from the current value but will have
1596 ** the same meaning). Any unknown/unsupported value is interpreted as
1597 ** "off".
1598 */
1599 const char *search_tokenizer_for_string(const char *z){
1600 char * zTmp = 0;
1601 const char *zRc = 0;
1602
1603 if( 0==z ){
1604 z = zTmp = db_get("search-tokenizer",0);
@@ -1607,10 +1613,19 @@
1613 zRc = is_truth(z) ? "porter" : "off";
1614 }
1615 fossil_free(zTmp);
1616 return zRc;
1617 }
1618
1619 /*
1620 ** Sets the search-tokenizer config setting to the value of
1621 ** search_tokenizer_for_string(zName).
1622 */
1623 void search_set_tokenizer(const char *zName){
1624 db_set("search-tokenizer", search_tokenizer_for_string( zName ), 0);
1625 iFtsTokenizer = -1;
1626 }
1627
1628 /*
1629 ** Create or drop the tables associated with a full-text index.
1630 */
1631 static int searchIdxExists = -1;
1632
+19 -2
--- src/setup.c
+++ src/setup.c
@@ -2005,10 +2005,25 @@
20052005
@ <p><a href="admin_log?n=%d(limit)&x=%d(limit+ofst)">[Older]</a></p>
20062006
}
20072007
style_finish_page();
20082008
}
20092009
2010
+
2011
+/*
2012
+** Renders a selection list of values for the search-tokenizer
2013
+** setting, using the form field name "ftstok".
2014
+*/
2015
+static void select_fts_tokenizer(void){
2016
+ const char *const aTokenizer[] = {
2017
+ "off", "None",
2018
+ "porter", "Porter Stemmer",
2019
+ "trigram", "Trigram"
2020
+ };
2021
+ multiple_choice_attribute("FTS Tokenizer", "search-tokenizer",
2022
+ "ftstok", "off", 3, aTokenizer);
2023
+}
2024
+
20102025
/*
20112026
** WEBPAGE: srchsetup
20122027
**
20132028
** Configure the search engine. Requires Admin privilege.
20142029
*/
@@ -2063,28 +2078,30 @@
20632078
@ <p><input type="submit" name="submit" value="Apply Changes" /></p>
20642079
@ <hr />
20652080
if( P("fts0") ){
20662081
search_drop_index();
20672082
}else if( P("fts1") ){
2083
+ const char *zTokenizer = PD("ftstok","off");
2084
+ search_set_tokenizer(zTokenizer);
20682085
search_drop_index();
20692086
search_create_index();
20702087
search_fill_index();
20712088
search_update_index(search_restrict(SRCH_ALL));
20722089
}
20732090
if( search_index_exists() ){
20742091
@ <p>Currently using an SQLite FTS%d(search_index_type(0)) search index.
20752092
@ The index helps search run faster, especially on large repositories,
20762093
@ but takes up space.</p>
2077
- onoff_attribute("Use Porter Stemmer","search-stemmer","ss",0,0);
2094
+ select_fts_tokenizer();
20782095
@ <p><input type="submit" name="fts0" value="Delete The Full-Text Index">
20792096
@ <input type="submit" name="fts1" value="Rebuild The Full-Text Index">
20802097
style_submenu_element("FTS Index Debugging","%R/test-ftsdocs");
20812098
}else{
20822099
@ <p>The SQLite search index is disabled. All searching will be
20832100
@ a full-text scan. This usually works fine, but can be slow for
20842101
@ larger repositories.</p>
2085
- onoff_attribute("Use Porter Stemmer","search-stemmer","ss",0,0);
2102
+ select_fts_tokenizer();
20862103
@ <p><input type="submit" name="fts1" value="Create A Full-Text Index">
20872104
}
20882105
@ </div></form>
20892106
style_finish_page();
20902107
}
20912108
--- src/setup.c
+++ src/setup.c
@@ -2005,10 +2005,25 @@
2005 @ <p><a href="admin_log?n=%d(limit)&x=%d(limit+ofst)">[Older]</a></p>
2006 }
2007 style_finish_page();
2008 }
2009
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2010 /*
2011 ** WEBPAGE: srchsetup
2012 **
2013 ** Configure the search engine. Requires Admin privilege.
2014 */
@@ -2063,28 +2078,30 @@
2063 @ <p><input type="submit" name="submit" value="Apply Changes" /></p>
2064 @ <hr />
2065 if( P("fts0") ){
2066 search_drop_index();
2067 }else if( P("fts1") ){
 
 
2068 search_drop_index();
2069 search_create_index();
2070 search_fill_index();
2071 search_update_index(search_restrict(SRCH_ALL));
2072 }
2073 if( search_index_exists() ){
2074 @ <p>Currently using an SQLite FTS%d(search_index_type(0)) search index.
2075 @ The index helps search run faster, especially on large repositories,
2076 @ but takes up space.</p>
2077 onoff_attribute("Use Porter Stemmer","search-stemmer","ss",0,0);
2078 @ <p><input type="submit" name="fts0" value="Delete The Full-Text Index">
2079 @ <input type="submit" name="fts1" value="Rebuild The Full-Text Index">
2080 style_submenu_element("FTS Index Debugging","%R/test-ftsdocs");
2081 }else{
2082 @ <p>The SQLite search index is disabled. All searching will be
2083 @ a full-text scan. This usually works fine, but can be slow for
2084 @ larger repositories.</p>
2085 onoff_attribute("Use Porter Stemmer","search-stemmer","ss",0,0);
2086 @ <p><input type="submit" name="fts1" value="Create A Full-Text Index">
2087 }
2088 @ </div></form>
2089 style_finish_page();
2090 }
2091
--- src/setup.c
+++ src/setup.c
@@ -2005,10 +2005,25 @@
2005 @ <p><a href="admin_log?n=%d(limit)&x=%d(limit+ofst)">[Older]</a></p>
2006 }
2007 style_finish_page();
2008 }
2009
2010
2011 /*
2012 ** Renders a selection list of values for the search-tokenizer
2013 ** setting, using the form field name "ftstok".
2014 */
2015 static void select_fts_tokenizer(void){
2016 const char *const aTokenizer[] = {
2017 "off", "None",
2018 "porter", "Porter Stemmer",
2019 "trigram", "Trigram"
2020 };
2021 multiple_choice_attribute("FTS Tokenizer", "search-tokenizer",
2022 "ftstok", "off", 3, aTokenizer);
2023 }
2024
2025 /*
2026 ** WEBPAGE: srchsetup
2027 **
2028 ** Configure the search engine. Requires Admin privilege.
2029 */
@@ -2063,28 +2078,30 @@
2078 @ <p><input type="submit" name="submit" value="Apply Changes" /></p>
2079 @ <hr />
2080 if( P("fts0") ){
2081 search_drop_index();
2082 }else if( P("fts1") ){
2083 const char *zTokenizer = PD("ftstok","off");
2084 search_set_tokenizer(zTokenizer);
2085 search_drop_index();
2086 search_create_index();
2087 search_fill_index();
2088 search_update_index(search_restrict(SRCH_ALL));
2089 }
2090 if( search_index_exists() ){
2091 @ <p>Currently using an SQLite FTS%d(search_index_type(0)) search index.
2092 @ The index helps search run faster, especially on large repositories,
2093 @ but takes up space.</p>
2094 select_fts_tokenizer();
2095 @ <p><input type="submit" name="fts0" value="Delete The Full-Text Index">
2096 @ <input type="submit" name="fts1" value="Rebuild The Full-Text Index">
2097 style_submenu_element("FTS Index Debugging","%R/test-ftsdocs");
2098 }else{
2099 @ <p>The SQLite search index is disabled. All searching will be
2100 @ a full-text scan. This usually works fine, but can be slow for
2101 @ larger repositories.</p>
2102 select_fts_tokenizer();
2103 @ <p><input type="submit" name="fts1" value="Create A Full-Text Index">
2104 }
2105 @ </div></form>
2106 style_finish_page();
2107 }
2108

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button