Fossil SCM

Add the /test-ftsdocs page, accessible only by administrators. Improved comments in the search logic.

drh 2016-08-30 13:35 trunk
Commit 375bc71ea6d10cfb3af23f76a66d9c06a6f9792e
1 file changed +188 -36
+188 -36
--- src/search.c
+++ src/search.c
@@ -13,29 +13,38 @@
1313
** [email protected]
1414
** http://www.hwaci.com/drh/
1515
**
1616
*******************************************************************************
1717
**
18
-** This file contains code to implement a very simple search function
18
+** This file contains code to implement a search functions
1919
** against timeline comments, check-in content, wiki pages, and/or tickets.
2020
**
21
-** The search is full-text like in that it is looking for words and ignores
22
-** punctuation and capitalization. But it is more akin to "grep" in that
23
-** it scans the entire corpus for the search, and it does not support the
24
-** full functionality of FTS4.
21
+** The search can be either a per-query "grep"-like search that scans
22
+** the entire corpus. Or it can use the FTS4 or FTS5 search engine of
23
+** SQLite. The choice is a administrator configuration option.
24
+**
25
+** The first option is referred to as "full-scan search". The second
26
+** option is called "indexed search".
27
+**
28
+** The code in this file is ordered approximately as follows:
29
+**
30
+** (1) The full-scan search engine
31
+** (2) The indexed search engine
32
+** (3) Higher level interfaces that uses use either (1) or (2) according
33
+** to the current search configuration settings
2534
*/
2635
#include "config.h"
2736
#include "search.h"
2837
#include <assert.h>
2938
3039
#if INTERFACE
3140
32
-/* Maximum number of search terms */
41
+/* Maximum number of search terms for full-scan search */
3342
#define SEARCH_MAX_TERM 8
3443
3544
/*
36
-** A compiled search pattern
45
+** A compiled search pattern used for full-scan search.
3746
*/
3847
struct Search {
3948
int nTerm; /* Number of search terms */
4049
struct srchTerm { /* For each search term */
4150
char *z; /* Text */
@@ -85,11 +94,11 @@
8594
};
8695
#define ISALNUM(x) (!isBoundary[(x)&0xff])
8796
8897
8998
/*
90
-** Destroy a search context.
99
+** Destroy a full-scan search context.
91100
*/
92101
void search_end(Search *p){
93102
if( p ){
94103
fossil_free(p->zPattern);
95104
fossil_free(p->zMarkBegin);
@@ -100,11 +109,11 @@
100109
if( p!=&gSearch ) fossil_free(p);
101110
}
102111
}
103112
104113
/*
105
-** Compile a search pattern
114
+** Compile a full-scan search pattern
106115
*/
107116
static Search *search_init(
108117
const char *zPattern, /* The search pattern */
109118
const char *zMarkBegin, /* Start of a match */
110119
const char *zMarkEnd, /* End of a match */
@@ -157,11 +166,12 @@
157166
blob_append(pSnip, zTxt, n);
158167
}
159168
}
160169
}
161170
162
-/*
171
+/* This the core search engine for full-scan search.
172
+**
163173
** Compare a search pattern against one or more input strings which
164174
** collectively comprise a document. Return a match score. Any
165175
** postive value means there was a match. Zero means that one or
166176
** more terms are missing.
167177
**
@@ -318,10 +328,13 @@
318328
319329
/*
320330
** COMMAND: test-match
321331
**
322332
** Usage: %fossil test-match SEARCHSTRING FILE1 FILE2 ...
333
+**
334
+** Run the full-scan search algorithm using SEARCHSTRING against
335
+** the text of the files listed. Output matches and snippets.
323336
*/
324337
void test_match_cmd(void){
325338
Search *p;
326339
int i;
327340
Blob x;
@@ -351,15 +364,19 @@
351364
}
352365
search_end(p);
353366
}
354367
355368
/*
356
-** An SQL function to initialize the global search pattern:
369
+** An SQL function to initialize the full-scan search pattern:
357370
**
358371
** search_init(PATTERN,BEGIN,END,GAP,FLAGS)
359372
**
360
-** All arguments are optional.
373
+** All arguments are optional. PATTERN is the search pattern. If it
374
+** is omitted, then the global search pattern is reset. BEGIN and END
375
+** and GAP are the strings used to construct snippets. FLAGS is an
376
+** integer bit pattern containing the various SRCH_CKIN, SRCH_DOC,
377
+** SRCH_TKT, or SRCH_ALL bits to determine what is to be searched.
361378
*/
362379
static void search_init_sqlfunc(
363380
sqlite3_context *context,
364381
int argc,
365382
sqlite3_value **argv
@@ -386,13 +403,15 @@
386403
}else{
387404
search_end(&gSearch);
388405
}
389406
}
390407
391
-/*
392
-** Try to match the input text against the search parameters set up
393
-** by the previous search_init() call. Remember the results globally.
408
+/* search_match(TEXT, TEXT, ....)
409
+**
410
+** Using the full-scan search engine created by the most recent call
411
+** to search_init(), match the input the TEXT arguments.
412
+** Remember the results global full-scan search object.
394413
** Return non-zero on a match and zero on a miss.
395414
*/
396415
static void search_match_sqlfunc(
397416
sqlite3_context *context,
398417
int argc,
@@ -407,21 +426,27 @@
407426
}
408427
rc = search_match(&gSearch, nDoc, azDoc);
409428
sqlite3_result_int(context, rc);
410429
}
411430
412
-/*
413
-** These SQL functions return the results of the last
414
-** call to the search_match() SQL function.
431
+
432
+/* search_score()
433
+**
434
+** Return the match score for the last successful search_match call.
415435
*/
416436
static void search_score_sqlfunc(
417437
sqlite3_context *context,
418438
int argc,
419439
sqlite3_value **argv
420440
){
421441
sqlite3_result_int(context, gSearch.iScore);
422442
}
443
+
444
+/* search_snippet()
445
+**
446
+** Return a snippet for the last successful search_match() call.
447
+*/
423448
static void search_snippet_sqlfunc(
424449
sqlite3_context *context,
425450
int argc,
426451
sqlite3_value **argv
427452
){
@@ -429,11 +454,12 @@
429454
sqlite3_result_text(context, blob_str(&gSearch.snip), -1, fossil_free);
430455
blob_init(&gSearch.snip, 0, 0);
431456
}
432457
}
433458
434
-/*
459
+/* stext(TYPE, RID, ARG)
460
+**
435461
** This is an SQLite function that computes the searchable text.
436462
** It is a wrapper around the search_stext() routine. See the
437463
** search_stext() routine for further detail.
438464
*/
439465
static void search_stext_sqlfunc(
@@ -445,10 +471,15 @@
445471
int rid = sqlite3_value_int(argv[1]);
446472
const char *zName = (const char*)sqlite3_value_text(argv[2]);
447473
sqlite3_result_text(context, search_stext_cached(zType[0],rid,zName,0), -1,
448474
SQLITE_TRANSIENT);
449475
}
476
+
477
+/* title(TYPE, RID, ARG)
478
+**
479
+** Return the title of the document to be search.
480
+*/
450481
static void search_title_sqlfunc(
451482
sqlite3_context *context,
452483
int argc,
453484
sqlite3_value **argv
454485
){
@@ -461,10 +492,15 @@
461492
sqlite3_result_text(context, z, nHdr, SQLITE_TRANSIENT);
462493
}else{
463494
sqlite3_result_value(context, argv[2]);
464495
}
465496
}
497
+
498
+/* body(TYPE, RID, ARG)
499
+**
500
+** Return the body of the document to be search.
501
+*/
466502
static void search_body_sqlfunc(
467503
sqlite3_context *context,
468504
int argc,
469505
sqlite3_value **argv
470506
){
@@ -474,12 +510,14 @@
474510
int nHdr = 0;
475511
char *z = search_stext_cached(zType[0], rid, zName, &nHdr);
476512
sqlite3_result_text(context, z+nHdr+1, -1, SQLITE_TRANSIENT);
477513
}
478514
479
-/*
480
-** Encode a string for use as a query parameter in a URL
515
+/* urlencode(X)
516
+**
517
+** Encode a string for use as a query parameter in a URL. This is
518
+** the equivalent of printf("%T",X).
481519
*/
482520
static void search_urlencode_sqlfunc(
483521
sqlite3_context *context,
484522
int argc,
485523
sqlite3_value **argv
@@ -487,13 +525,12 @@
487525
char *z = mprintf("%T",sqlite3_value_text(argv[0]));
488526
sqlite3_result_text(context, z, -1, fossil_free);
489527
}
490528
491529
/*
492
-** Register the "score()" SQL function to score its input text
493
-** using the given Search object. Once this function is registered,
494
-** do not delete the Search object.
530
+** Register the various SQL functions (defined above) needed to implement
531
+** full-scan search.
495532
*/
496533
void search_sql_setup(sqlite3 *db){
497534
static int once = 0;
498535
if( once++ ) return;
499536
sqlite3_create_function(db, "search_match", -1, SQLITE_UTF8, 0,
@@ -636,16 +673,22 @@
636673
}
637674
638675
/*
639676
** When this routine is called, there already exists a table
640677
**
641
-** x(label,url,score,date,snip).
678
+** x(label,url,score,id,snip).
679
+**
680
+** label: The "name" of the document containing the match
681
+** url: A URL for the document
682
+** score: How well the document matched
683
+** id: The document id. Format: xNNNNN, x: type, N: number
684
+** snip: A snippet for the match
642685
**
643686
** And the srchFlags parameter has been validated. This routine
644
-** fills the X table with search results using a full-text scan.
687
+** fills the X table with search results using a full-scan search.
645688
**
646
-** The companion indexed scan routine is search_indexed().
689
+** The companion indexed search routine is search_indexed().
647690
*/
648691
static void search_fullscan(
649692
const char *zPattern, /* The query pattern */
650693
unsigned int srchFlags /* What to search over */
651694
){
@@ -805,16 +848,22 @@
805848
}
806849
807850
/*
808851
** When this routine is called, there already exists a table
809852
**
810
-** x(label,url,score,date,snip).
853
+** x(label,url,score,id,snip).
854
+**
855
+** label: The "name" of the document containing the match
856
+** url: A URL for the document
857
+** score: How well the document matched
858
+** id: The document id. Format: xNNNNN, x: type, N: number
859
+** snip: A snippet for the match
811860
**
812861
** And the srchFlags parameter has been validated. This routine
813
-** fills the X table with search results using a index scan.
862
+** fills the X table with search results using FTS indexed search.
814863
**
815
-** The companion full-text scan routine is search_fullscan().
864
+** The companion full-scan search routine is search_fullscan().
816865
*/
817866
static void search_indexed(
818867
const char *zPattern, /* The query pattern */
819868
unsigned int srchFlags /* What to search over */
820869
){
@@ -910,10 +959,14 @@
910959
911960
/*
912961
** This routine generates web-page output for a search operation.
913962
** Other web-pages can invoke this routine to add search results
914963
** in the middle of the page.
964
+**
965
+** This routine works for both full-scan and indexed search. The
966
+** appropriate low-level search routine is called according to the
967
+** current configuration.
915968
**
916969
** Return the number of rows.
917970
*/
918971
int search_run_and_output(
919972
const char *zPattern, /* The query pattern */
@@ -930,14 +983,14 @@
930983
add_content_sql_commands(g.db);
931984
db_multi_exec(
932985
"CREATE TEMP TABLE x(label,url,score,id,date,snip);"
933986
);
934987
if( !search_index_exists() ){
935
- search_fullscan(zPattern, srchFlags);
988
+ search_fullscan(zPattern, srchFlags); /* Full-scan search */
936989
}else{
937
- search_update_index(srchFlags);
938
- search_indexed(zPattern, srchFlags);
990
+ search_update_index(srchFlags); /* Update the index, if necessary */
991
+ search_indexed(zPattern, srchFlags); /* Indexed search */
939992
}
940993
db_prepare(&q, "SELECT url, snip, label, score, id"
941994
" FROM x"
942995
" ORDER BY score DESC, date DESC;");
943996
while( db_step(&q)==SQLITE_ROW ){
@@ -1185,11 +1238,14 @@
11851238
** t Ticket text
11861239
**
11871240
** rid The RID of an artifact that defines the object
11881241
** being searched.
11891242
**
1190
-** zName Name of the object being searched.
1243
+** zName Name of the object being searched. This is used
1244
+** only to help figure out the mimetype (text/plain,
1245
+** test/html, test/x-fossil-wiki, or text/x-markdown)
1246
+** so that the code can know how to simplify the text.
11911247
*/
11921248
void search_stext(
11931249
char cType, /* Type of document */
11941250
int rid, /* BLOB.RID or TAG.TAGID value for document */
11951251
const char *zName, /* Auxiliary information */
@@ -1288,11 +1344,11 @@
12881344
** with an eType of 0 to clear the cache.
12891345
*/
12901346
char *search_stext_cached(
12911347
char cType, /* Type of document */
12921348
int rid, /* BLOB.RID or TAG.TAGID value for document */
1293
- const char *zName, /* Auxiliary information */
1349
+ const char *zName, /* Auxiliary information, for mimetype */
12941350
int *pnTitle /* OUT: length of title in bytes excluding \n */
12951351
){
12961352
static struct {
12971353
Blob stext; /* Cached search text */
12981354
char cType; /* The type */
@@ -1320,11 +1376,16 @@
13201376
}
13211377
13221378
/*
13231379
** COMMAND: test-search-stext
13241380
**
1325
-** Usage: fossil test-search-stext TYPE ARG1 ARG2
1381
+** Usage: fossil test-search-stext TYPE RID NAME
1382
+**
1383
+** Compute the search text for document TYPE-RID whose name is NAME.
1384
+** The TYPE is one of "c", "d", "t", or "w". The RID is the document
1385
+** ID. The NAME is used to figure out a mimetype to use for formatting
1386
+** the raw document text.
13261387
*/
13271388
void test_search_stext(void){
13281389
Blob out;
13291390
db_find_and_open_repository(0,0);
13301391
if( g.argc!=5 ) usage("TYPE RID NAME");
@@ -1742,5 +1803,96 @@
17421803
}else{
17431804
fossil_print("%-16s disabled\n", "full-text index:");
17441805
}
17451806
db_end_transaction(0);
17461807
}
1808
+
1809
+/*
1810
+** WEBPAGE: test-ftsdocs
1811
+**
1812
+** Show a table of all documents currently in the search index.
1813
+*/
1814
+void search_data_page(void){
1815
+ Stmt q;
1816
+ const char *zId = P("id");
1817
+ const char *zType = P("y");
1818
+ const char *zIdxed = P("ixed");
1819
+ int id;
1820
+ int cnt = 0;
1821
+ login_check_credentials();
1822
+ if( !g.perm.Admin ){ login_needed(0); return; }
1823
+ if( !search_index_exists() ){
1824
+ @ <p>Indexed search is disabled
1825
+ style_footer();
1826
+ return;
1827
+ }
1828
+ if( zId!=0 && (id = atoi(zId))>0 ){
1829
+ /* Show information about a single ftsdocs entry */
1830
+ style_header("Information about ftsdoc entry %d", id);
1831
+ db_prepare(&q,
1832
+ "SELECT type||rid, name, idxed, label, url, datetime(mtime)"
1833
+ " FROM ftsdocs WHERE rowid=%d", id
1834
+ );
1835
+ if( db_step(&q)==SQLITE_ROW ){
1836
+ const char *zUrl = db_column_text(&q,4);
1837
+ @ <table border=0>
1838
+ @ <tr><td align='right'>rowid:<td>&nbsp;&nbsp;<td>%d(id)
1839
+ @ <tr><td align='right'>id:<td><td>%s(db_column_text(&q,0))
1840
+ @ <tr><td align='right'>name:<td><td>%h(db_column_text(&q,1))
1841
+ @ <tr><td align='right'>idxed:<td><td>%d(db_column_int(&q,2))
1842
+ @ <tr><td align='right'>label:<td><td>%h(db_column_text(&q,3))
1843
+ @ <tr><td align='right'>url:<td><td>
1844
+ @ <a href='%R%s(zUrl)'>%h(zUrl)</a>
1845
+ @ <tr><td align='right'>mtime:<td><td>%s(db_column_text(&q,5))
1846
+ @ </table>
1847
+ }
1848
+ db_finalize(&q);
1849
+ style_footer();
1850
+ return;
1851
+ }
1852
+ if( zType!=0 && zType[0]!=0 && zType[1]==0 &&
1853
+ zIdxed!=0 && (zIdxed[0]=='1' || zIdxed[0]=='0') && zIdxed[1]==0
1854
+ ){
1855
+ int ixed = zIdxed[0]=='1';
1856
+ style_header("List of '%c' documents that are%s indexed",
1857
+ zType[0], ixed ? "" : " not");
1858
+ db_prepare(&q,
1859
+ "SELECT rowid, type||rid ||' '|| coalesce(label,'')"
1860
+ " FROM ftsdocs WHERE type='%c' AND %s idxed",
1861
+ zType[0], ixed ? "" : "NOT"
1862
+ );
1863
+ @ <ul>
1864
+ while( db_step(&q)==SQLITE_ROW ){
1865
+ @ <li> <a href='test-ftsdocs?id=%d(db_column_int(&q,0))'>
1866
+ @ %h(db_column_text(&q,1))</a>
1867
+ }
1868
+ @ </ul>
1869
+ db_finalize(&q);
1870
+ style_footer();
1871
+ return;
1872
+ }
1873
+ style_header("Summary of ftsdocs");
1874
+ db_prepare(&q,
1875
+ "SELECT type, idxed, count(*) FROM ftsdocs"
1876
+ " GROUP BY 1, 2 ORDER BY 3 DESC"
1877
+ );
1878
+ @ <table border=1 cellpadding=3 cellspacing=0>
1879
+ @ <thead>
1880
+ @ <tr><th>Type<th>Indexed?<th>Count<th>Link
1881
+ @ </thead>
1882
+ @ <tbody>
1883
+ while( db_step(&q)==SQLITE_ROW ){
1884
+ const char *zType = db_column_text(&q,0);
1885
+ int idxed = db_column_int(&q,1);
1886
+ int n = db_column_int(&q,2);
1887
+ @ <tr><td>%h(zType)<td>%d(idxed)
1888
+ @ <td>%d(n)
1889
+ @ <td><a href='test-ftsdocs?y=%s(zType)&ixed=%d(idxed)'>listing</a>
1890
+ @ </tr>
1891
+ cnt += n;
1892
+ }
1893
+ @ </tbody><tfooter>
1894
+ @ <tr><th>Total<th><th>%d(cnt)<th>
1895
+ @ </tfooter>
1896
+ @ </table>
1897
+ style_footer();
1898
+}
17471899
--- src/search.c
+++ src/search.c
@@ -13,29 +13,38 @@
13 ** [email protected]
14 ** http://www.hwaci.com/drh/
15 **
16 *******************************************************************************
17 **
18 ** This file contains code to implement a very simple search function
19 ** against timeline comments, check-in content, wiki pages, and/or tickets.
20 **
21 ** The search is full-text like in that it is looking for words and ignores
22 ** punctuation and capitalization. But it is more akin to "grep" in that
23 ** it scans the entire corpus for the search, and it does not support the
24 ** full functionality of FTS4.
 
 
 
 
 
 
 
 
 
25 */
26 #include "config.h"
27 #include "search.h"
28 #include <assert.h>
29
30 #if INTERFACE
31
32 /* Maximum number of search terms */
33 #define SEARCH_MAX_TERM 8
34
35 /*
36 ** A compiled search pattern
37 */
38 struct Search {
39 int nTerm; /* Number of search terms */
40 struct srchTerm { /* For each search term */
41 char *z; /* Text */
@@ -85,11 +94,11 @@
85 };
86 #define ISALNUM(x) (!isBoundary[(x)&0xff])
87
88
89 /*
90 ** Destroy a search context.
91 */
92 void search_end(Search *p){
93 if( p ){
94 fossil_free(p->zPattern);
95 fossil_free(p->zMarkBegin);
@@ -100,11 +109,11 @@
100 if( p!=&gSearch ) fossil_free(p);
101 }
102 }
103
104 /*
105 ** Compile a search pattern
106 */
107 static Search *search_init(
108 const char *zPattern, /* The search pattern */
109 const char *zMarkBegin, /* Start of a match */
110 const char *zMarkEnd, /* End of a match */
@@ -157,11 +166,12 @@
157 blob_append(pSnip, zTxt, n);
158 }
159 }
160 }
161
162 /*
 
163 ** Compare a search pattern against one or more input strings which
164 ** collectively comprise a document. Return a match score. Any
165 ** postive value means there was a match. Zero means that one or
166 ** more terms are missing.
167 **
@@ -318,10 +328,13 @@
318
319 /*
320 ** COMMAND: test-match
321 **
322 ** Usage: %fossil test-match SEARCHSTRING FILE1 FILE2 ...
 
 
 
323 */
324 void test_match_cmd(void){
325 Search *p;
326 int i;
327 Blob x;
@@ -351,15 +364,19 @@
351 }
352 search_end(p);
353 }
354
355 /*
356 ** An SQL function to initialize the global search pattern:
357 **
358 ** search_init(PATTERN,BEGIN,END,GAP,FLAGS)
359 **
360 ** All arguments are optional.
 
 
 
 
361 */
362 static void search_init_sqlfunc(
363 sqlite3_context *context,
364 int argc,
365 sqlite3_value **argv
@@ -386,13 +403,15 @@
386 }else{
387 search_end(&gSearch);
388 }
389 }
390
391 /*
392 ** Try to match the input text against the search parameters set up
393 ** by the previous search_init() call. Remember the results globally.
 
 
394 ** Return non-zero on a match and zero on a miss.
395 */
396 static void search_match_sqlfunc(
397 sqlite3_context *context,
398 int argc,
@@ -407,21 +426,27 @@
407 }
408 rc = search_match(&gSearch, nDoc, azDoc);
409 sqlite3_result_int(context, rc);
410 }
411
412 /*
413 ** These SQL functions return the results of the last
414 ** call to the search_match() SQL function.
 
415 */
416 static void search_score_sqlfunc(
417 sqlite3_context *context,
418 int argc,
419 sqlite3_value **argv
420 ){
421 sqlite3_result_int(context, gSearch.iScore);
422 }
 
 
 
 
 
423 static void search_snippet_sqlfunc(
424 sqlite3_context *context,
425 int argc,
426 sqlite3_value **argv
427 ){
@@ -429,11 +454,12 @@
429 sqlite3_result_text(context, blob_str(&gSearch.snip), -1, fossil_free);
430 blob_init(&gSearch.snip, 0, 0);
431 }
432 }
433
434 /*
 
435 ** This is an SQLite function that computes the searchable text.
436 ** It is a wrapper around the search_stext() routine. See the
437 ** search_stext() routine for further detail.
438 */
439 static void search_stext_sqlfunc(
@@ -445,10 +471,15 @@
445 int rid = sqlite3_value_int(argv[1]);
446 const char *zName = (const char*)sqlite3_value_text(argv[2]);
447 sqlite3_result_text(context, search_stext_cached(zType[0],rid,zName,0), -1,
448 SQLITE_TRANSIENT);
449 }
 
 
 
 
 
450 static void search_title_sqlfunc(
451 sqlite3_context *context,
452 int argc,
453 sqlite3_value **argv
454 ){
@@ -461,10 +492,15 @@
461 sqlite3_result_text(context, z, nHdr, SQLITE_TRANSIENT);
462 }else{
463 sqlite3_result_value(context, argv[2]);
464 }
465 }
 
 
 
 
 
466 static void search_body_sqlfunc(
467 sqlite3_context *context,
468 int argc,
469 sqlite3_value **argv
470 ){
@@ -474,12 +510,14 @@
474 int nHdr = 0;
475 char *z = search_stext_cached(zType[0], rid, zName, &nHdr);
476 sqlite3_result_text(context, z+nHdr+1, -1, SQLITE_TRANSIENT);
477 }
478
479 /*
480 ** Encode a string for use as a query parameter in a URL
 
 
481 */
482 static void search_urlencode_sqlfunc(
483 sqlite3_context *context,
484 int argc,
485 sqlite3_value **argv
@@ -487,13 +525,12 @@
487 char *z = mprintf("%T",sqlite3_value_text(argv[0]));
488 sqlite3_result_text(context, z, -1, fossil_free);
489 }
490
491 /*
492 ** Register the "score()" SQL function to score its input text
493 ** using the given Search object. Once this function is registered,
494 ** do not delete the Search object.
495 */
496 void search_sql_setup(sqlite3 *db){
497 static int once = 0;
498 if( once++ ) return;
499 sqlite3_create_function(db, "search_match", -1, SQLITE_UTF8, 0,
@@ -636,16 +673,22 @@
636 }
637
638 /*
639 ** When this routine is called, there already exists a table
640 **
641 ** x(label,url,score,date,snip).
 
 
 
 
 
 
642 **
643 ** And the srchFlags parameter has been validated. This routine
644 ** fills the X table with search results using a full-text scan.
645 **
646 ** The companion indexed scan routine is search_indexed().
647 */
648 static void search_fullscan(
649 const char *zPattern, /* The query pattern */
650 unsigned int srchFlags /* What to search over */
651 ){
@@ -805,16 +848,22 @@
805 }
806
807 /*
808 ** When this routine is called, there already exists a table
809 **
810 ** x(label,url,score,date,snip).
 
 
 
 
 
 
811 **
812 ** And the srchFlags parameter has been validated. This routine
813 ** fills the X table with search results using a index scan.
814 **
815 ** The companion full-text scan routine is search_fullscan().
816 */
817 static void search_indexed(
818 const char *zPattern, /* The query pattern */
819 unsigned int srchFlags /* What to search over */
820 ){
@@ -910,10 +959,14 @@
910
911 /*
912 ** This routine generates web-page output for a search operation.
913 ** Other web-pages can invoke this routine to add search results
914 ** in the middle of the page.
 
 
 
 
915 **
916 ** Return the number of rows.
917 */
918 int search_run_and_output(
919 const char *zPattern, /* The query pattern */
@@ -930,14 +983,14 @@
930 add_content_sql_commands(g.db);
931 db_multi_exec(
932 "CREATE TEMP TABLE x(label,url,score,id,date,snip);"
933 );
934 if( !search_index_exists() ){
935 search_fullscan(zPattern, srchFlags);
936 }else{
937 search_update_index(srchFlags);
938 search_indexed(zPattern, srchFlags);
939 }
940 db_prepare(&q, "SELECT url, snip, label, score, id"
941 " FROM x"
942 " ORDER BY score DESC, date DESC;");
943 while( db_step(&q)==SQLITE_ROW ){
@@ -1185,11 +1238,14 @@
1185 ** t Ticket text
1186 **
1187 ** rid The RID of an artifact that defines the object
1188 ** being searched.
1189 **
1190 ** zName Name of the object being searched.
 
 
 
1191 */
1192 void search_stext(
1193 char cType, /* Type of document */
1194 int rid, /* BLOB.RID or TAG.TAGID value for document */
1195 const char *zName, /* Auxiliary information */
@@ -1288,11 +1344,11 @@
1288 ** with an eType of 0 to clear the cache.
1289 */
1290 char *search_stext_cached(
1291 char cType, /* Type of document */
1292 int rid, /* BLOB.RID or TAG.TAGID value for document */
1293 const char *zName, /* Auxiliary information */
1294 int *pnTitle /* OUT: length of title in bytes excluding \n */
1295 ){
1296 static struct {
1297 Blob stext; /* Cached search text */
1298 char cType; /* The type */
@@ -1320,11 +1376,16 @@
1320 }
1321
1322 /*
1323 ** COMMAND: test-search-stext
1324 **
1325 ** Usage: fossil test-search-stext TYPE ARG1 ARG2
 
 
 
 
 
1326 */
1327 void test_search_stext(void){
1328 Blob out;
1329 db_find_and_open_repository(0,0);
1330 if( g.argc!=5 ) usage("TYPE RID NAME");
@@ -1742,5 +1803,96 @@
1742 }else{
1743 fossil_print("%-16s disabled\n", "full-text index:");
1744 }
1745 db_end_transaction(0);
1746 }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1747
--- src/search.c
+++ src/search.c
@@ -13,29 +13,38 @@
13 ** [email protected]
14 ** http://www.hwaci.com/drh/
15 **
16 *******************************************************************************
17 **
18 ** This file contains code to implement a search functions
19 ** against timeline comments, check-in content, wiki pages, and/or tickets.
20 **
21 ** The search can be either a per-query "grep"-like search that scans
22 ** the entire corpus. Or it can use the FTS4 or FTS5 search engine of
23 ** SQLite. The choice is a administrator configuration option.
24 **
25 ** The first option is referred to as "full-scan search". The second
26 ** option is called "indexed search".
27 **
28 ** The code in this file is ordered approximately as follows:
29 **
30 ** (1) The full-scan search engine
31 ** (2) The indexed search engine
32 ** (3) Higher level interfaces that uses use either (1) or (2) according
33 ** to the current search configuration settings
34 */
35 #include "config.h"
36 #include "search.h"
37 #include <assert.h>
38
39 #if INTERFACE
40
41 /* Maximum number of search terms for full-scan search */
42 #define SEARCH_MAX_TERM 8
43
44 /*
45 ** A compiled search pattern used for full-scan search.
46 */
47 struct Search {
48 int nTerm; /* Number of search terms */
49 struct srchTerm { /* For each search term */
50 char *z; /* Text */
@@ -85,11 +94,11 @@
94 };
95 #define ISALNUM(x) (!isBoundary[(x)&0xff])
96
97
98 /*
99 ** Destroy a full-scan search context.
100 */
101 void search_end(Search *p){
102 if( p ){
103 fossil_free(p->zPattern);
104 fossil_free(p->zMarkBegin);
@@ -100,11 +109,11 @@
109 if( p!=&gSearch ) fossil_free(p);
110 }
111 }
112
113 /*
114 ** Compile a full-scan search pattern
115 */
116 static Search *search_init(
117 const char *zPattern, /* The search pattern */
118 const char *zMarkBegin, /* Start of a match */
119 const char *zMarkEnd, /* End of a match */
@@ -157,11 +166,12 @@
166 blob_append(pSnip, zTxt, n);
167 }
168 }
169 }
170
171 /* This the core search engine for full-scan search.
172 **
173 ** Compare a search pattern against one or more input strings which
174 ** collectively comprise a document. Return a match score. Any
175 ** postive value means there was a match. Zero means that one or
176 ** more terms are missing.
177 **
@@ -318,10 +328,13 @@
328
329 /*
330 ** COMMAND: test-match
331 **
332 ** Usage: %fossil test-match SEARCHSTRING FILE1 FILE2 ...
333 **
334 ** Run the full-scan search algorithm using SEARCHSTRING against
335 ** the text of the files listed. Output matches and snippets.
336 */
337 void test_match_cmd(void){
338 Search *p;
339 int i;
340 Blob x;
@@ -351,15 +364,19 @@
364 }
365 search_end(p);
366 }
367
368 /*
369 ** An SQL function to initialize the full-scan search pattern:
370 **
371 ** search_init(PATTERN,BEGIN,END,GAP,FLAGS)
372 **
373 ** All arguments are optional. PATTERN is the search pattern. If it
374 ** is omitted, then the global search pattern is reset. BEGIN and END
375 ** and GAP are the strings used to construct snippets. FLAGS is an
376 ** integer bit pattern containing the various SRCH_CKIN, SRCH_DOC,
377 ** SRCH_TKT, or SRCH_ALL bits to determine what is to be searched.
378 */
379 static void search_init_sqlfunc(
380 sqlite3_context *context,
381 int argc,
382 sqlite3_value **argv
@@ -386,13 +403,15 @@
403 }else{
404 search_end(&gSearch);
405 }
406 }
407
408 /* search_match(TEXT, TEXT, ....)
409 **
410 ** Using the full-scan search engine created by the most recent call
411 ** to search_init(), match the input the TEXT arguments.
412 ** Remember the results global full-scan search object.
413 ** Return non-zero on a match and zero on a miss.
414 */
415 static void search_match_sqlfunc(
416 sqlite3_context *context,
417 int argc,
@@ -407,21 +426,27 @@
426 }
427 rc = search_match(&gSearch, nDoc, azDoc);
428 sqlite3_result_int(context, rc);
429 }
430
431
432 /* search_score()
433 **
434 ** Return the match score for the last successful search_match call.
435 */
436 static void search_score_sqlfunc(
437 sqlite3_context *context,
438 int argc,
439 sqlite3_value **argv
440 ){
441 sqlite3_result_int(context, gSearch.iScore);
442 }
443
444 /* search_snippet()
445 **
446 ** Return a snippet for the last successful search_match() call.
447 */
448 static void search_snippet_sqlfunc(
449 sqlite3_context *context,
450 int argc,
451 sqlite3_value **argv
452 ){
@@ -429,11 +454,12 @@
454 sqlite3_result_text(context, blob_str(&gSearch.snip), -1, fossil_free);
455 blob_init(&gSearch.snip, 0, 0);
456 }
457 }
458
459 /* stext(TYPE, RID, ARG)
460 **
461 ** This is an SQLite function that computes the searchable text.
462 ** It is a wrapper around the search_stext() routine. See the
463 ** search_stext() routine for further detail.
464 */
465 static void search_stext_sqlfunc(
@@ -445,10 +471,15 @@
471 int rid = sqlite3_value_int(argv[1]);
472 const char *zName = (const char*)sqlite3_value_text(argv[2]);
473 sqlite3_result_text(context, search_stext_cached(zType[0],rid,zName,0), -1,
474 SQLITE_TRANSIENT);
475 }
476
477 /* title(TYPE, RID, ARG)
478 **
479 ** Return the title of the document to be search.
480 */
481 static void search_title_sqlfunc(
482 sqlite3_context *context,
483 int argc,
484 sqlite3_value **argv
485 ){
@@ -461,10 +492,15 @@
492 sqlite3_result_text(context, z, nHdr, SQLITE_TRANSIENT);
493 }else{
494 sqlite3_result_value(context, argv[2]);
495 }
496 }
497
498 /* body(TYPE, RID, ARG)
499 **
500 ** Return the body of the document to be search.
501 */
502 static void search_body_sqlfunc(
503 sqlite3_context *context,
504 int argc,
505 sqlite3_value **argv
506 ){
@@ -474,12 +510,14 @@
510 int nHdr = 0;
511 char *z = search_stext_cached(zType[0], rid, zName, &nHdr);
512 sqlite3_result_text(context, z+nHdr+1, -1, SQLITE_TRANSIENT);
513 }
514
515 /* urlencode(X)
516 **
517 ** Encode a string for use as a query parameter in a URL. This is
518 ** the equivalent of printf("%T",X).
519 */
520 static void search_urlencode_sqlfunc(
521 sqlite3_context *context,
522 int argc,
523 sqlite3_value **argv
@@ -487,13 +525,12 @@
525 char *z = mprintf("%T",sqlite3_value_text(argv[0]));
526 sqlite3_result_text(context, z, -1, fossil_free);
527 }
528
529 /*
530 ** Register the various SQL functions (defined above) needed to implement
531 ** full-scan search.
 
532 */
533 void search_sql_setup(sqlite3 *db){
534 static int once = 0;
535 if( once++ ) return;
536 sqlite3_create_function(db, "search_match", -1, SQLITE_UTF8, 0,
@@ -636,16 +673,22 @@
673 }
674
675 /*
676 ** When this routine is called, there already exists a table
677 **
678 ** x(label,url,score,id,snip).
679 **
680 ** label: The "name" of the document containing the match
681 ** url: A URL for the document
682 ** score: How well the document matched
683 ** id: The document id. Format: xNNNNN, x: type, N: number
684 ** snip: A snippet for the match
685 **
686 ** And the srchFlags parameter has been validated. This routine
687 ** fills the X table with search results using a full-scan search.
688 **
689 ** The companion indexed search routine is search_indexed().
690 */
691 static void search_fullscan(
692 const char *zPattern, /* The query pattern */
693 unsigned int srchFlags /* What to search over */
694 ){
@@ -805,16 +848,22 @@
848 }
849
850 /*
851 ** When this routine is called, there already exists a table
852 **
853 ** x(label,url,score,id,snip).
854 **
855 ** label: The "name" of the document containing the match
856 ** url: A URL for the document
857 ** score: How well the document matched
858 ** id: The document id. Format: xNNNNN, x: type, N: number
859 ** snip: A snippet for the match
860 **
861 ** And the srchFlags parameter has been validated. This routine
862 ** fills the X table with search results using FTS indexed search.
863 **
864 ** The companion full-scan search routine is search_fullscan().
865 */
866 static void search_indexed(
867 const char *zPattern, /* The query pattern */
868 unsigned int srchFlags /* What to search over */
869 ){
@@ -910,10 +959,14 @@
959
960 /*
961 ** This routine generates web-page output for a search operation.
962 ** Other web-pages can invoke this routine to add search results
963 ** in the middle of the page.
964 **
965 ** This routine works for both full-scan and indexed search. The
966 ** appropriate low-level search routine is called according to the
967 ** current configuration.
968 **
969 ** Return the number of rows.
970 */
971 int search_run_and_output(
972 const char *zPattern, /* The query pattern */
@@ -930,14 +983,14 @@
983 add_content_sql_commands(g.db);
984 db_multi_exec(
985 "CREATE TEMP TABLE x(label,url,score,id,date,snip);"
986 );
987 if( !search_index_exists() ){
988 search_fullscan(zPattern, srchFlags); /* Full-scan search */
989 }else{
990 search_update_index(srchFlags); /* Update the index, if necessary */
991 search_indexed(zPattern, srchFlags); /* Indexed search */
992 }
993 db_prepare(&q, "SELECT url, snip, label, score, id"
994 " FROM x"
995 " ORDER BY score DESC, date DESC;");
996 while( db_step(&q)==SQLITE_ROW ){
@@ -1185,11 +1238,14 @@
1238 ** t Ticket text
1239 **
1240 ** rid The RID of an artifact that defines the object
1241 ** being searched.
1242 **
1243 ** zName Name of the object being searched. This is used
1244 ** only to help figure out the mimetype (text/plain,
1245 ** test/html, test/x-fossil-wiki, or text/x-markdown)
1246 ** so that the code can know how to simplify the text.
1247 */
1248 void search_stext(
1249 char cType, /* Type of document */
1250 int rid, /* BLOB.RID or TAG.TAGID value for document */
1251 const char *zName, /* Auxiliary information */
@@ -1288,11 +1344,11 @@
1344 ** with an eType of 0 to clear the cache.
1345 */
1346 char *search_stext_cached(
1347 char cType, /* Type of document */
1348 int rid, /* BLOB.RID or TAG.TAGID value for document */
1349 const char *zName, /* Auxiliary information, for mimetype */
1350 int *pnTitle /* OUT: length of title in bytes excluding \n */
1351 ){
1352 static struct {
1353 Blob stext; /* Cached search text */
1354 char cType; /* The type */
@@ -1320,11 +1376,16 @@
1376 }
1377
1378 /*
1379 ** COMMAND: test-search-stext
1380 **
1381 ** Usage: fossil test-search-stext TYPE RID NAME
1382 **
1383 ** Compute the search text for document TYPE-RID whose name is NAME.
1384 ** The TYPE is one of "c", "d", "t", or "w". The RID is the document
1385 ** ID. The NAME is used to figure out a mimetype to use for formatting
1386 ** the raw document text.
1387 */
1388 void test_search_stext(void){
1389 Blob out;
1390 db_find_and_open_repository(0,0);
1391 if( g.argc!=5 ) usage("TYPE RID NAME");
@@ -1742,5 +1803,96 @@
1803 }else{
1804 fossil_print("%-16s disabled\n", "full-text index:");
1805 }
1806 db_end_transaction(0);
1807 }
1808
1809 /*
1810 ** WEBPAGE: test-ftsdocs
1811 **
1812 ** Show a table of all documents currently in the search index.
1813 */
1814 void search_data_page(void){
1815 Stmt q;
1816 const char *zId = P("id");
1817 const char *zType = P("y");
1818 const char *zIdxed = P("ixed");
1819 int id;
1820 int cnt = 0;
1821 login_check_credentials();
1822 if( !g.perm.Admin ){ login_needed(0); return; }
1823 if( !search_index_exists() ){
1824 @ <p>Indexed search is disabled
1825 style_footer();
1826 return;
1827 }
1828 if( zId!=0 && (id = atoi(zId))>0 ){
1829 /* Show information about a single ftsdocs entry */
1830 style_header("Information about ftsdoc entry %d", id);
1831 db_prepare(&q,
1832 "SELECT type||rid, name, idxed, label, url, datetime(mtime)"
1833 " FROM ftsdocs WHERE rowid=%d", id
1834 );
1835 if( db_step(&q)==SQLITE_ROW ){
1836 const char *zUrl = db_column_text(&q,4);
1837 @ <table border=0>
1838 @ <tr><td align='right'>rowid:<td>&nbsp;&nbsp;<td>%d(id)
1839 @ <tr><td align='right'>id:<td><td>%s(db_column_text(&q,0))
1840 @ <tr><td align='right'>name:<td><td>%h(db_column_text(&q,1))
1841 @ <tr><td align='right'>idxed:<td><td>%d(db_column_int(&q,2))
1842 @ <tr><td align='right'>label:<td><td>%h(db_column_text(&q,3))
1843 @ <tr><td align='right'>url:<td><td>
1844 @ <a href='%R%s(zUrl)'>%h(zUrl)</a>
1845 @ <tr><td align='right'>mtime:<td><td>%s(db_column_text(&q,5))
1846 @ </table>
1847 }
1848 db_finalize(&q);
1849 style_footer();
1850 return;
1851 }
1852 if( zType!=0 && zType[0]!=0 && zType[1]==0 &&
1853 zIdxed!=0 && (zIdxed[0]=='1' || zIdxed[0]=='0') && zIdxed[1]==0
1854 ){
1855 int ixed = zIdxed[0]=='1';
1856 style_header("List of '%c' documents that are%s indexed",
1857 zType[0], ixed ? "" : " not");
1858 db_prepare(&q,
1859 "SELECT rowid, type||rid ||' '|| coalesce(label,'')"
1860 " FROM ftsdocs WHERE type='%c' AND %s idxed",
1861 zType[0], ixed ? "" : "NOT"
1862 );
1863 @ <ul>
1864 while( db_step(&q)==SQLITE_ROW ){
1865 @ <li> <a href='test-ftsdocs?id=%d(db_column_int(&q,0))'>
1866 @ %h(db_column_text(&q,1))</a>
1867 }
1868 @ </ul>
1869 db_finalize(&q);
1870 style_footer();
1871 return;
1872 }
1873 style_header("Summary of ftsdocs");
1874 db_prepare(&q,
1875 "SELECT type, idxed, count(*) FROM ftsdocs"
1876 " GROUP BY 1, 2 ORDER BY 3 DESC"
1877 );
1878 @ <table border=1 cellpadding=3 cellspacing=0>
1879 @ <thead>
1880 @ <tr><th>Type<th>Indexed?<th>Count<th>Link
1881 @ </thead>
1882 @ <tbody>
1883 while( db_step(&q)==SQLITE_ROW ){
1884 const char *zType = db_column_text(&q,0);
1885 int idxed = db_column_int(&q,1);
1886 int n = db_column_int(&q,2);
1887 @ <tr><td>%h(zType)<td>%d(idxed)
1888 @ <td>%d(n)
1889 @ <td><a href='test-ftsdocs?y=%s(zType)&ixed=%d(idxed)'>listing</a>
1890 @ </tr>
1891 cnt += n;
1892 }
1893 @ </tbody><tfooter>
1894 @ <tr><th>Total<th><th>%d(cnt)<th>
1895 @ </tfooter>
1896 @ </table>
1897 style_footer();
1898 }
1899

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button