Fossil SCM
Add the /test-ftsdocs page, accessible only by administrators. Improved comments in the search logic.
Commit
375bc71ea6d10cfb3af23f76a66d9c06a6f9792e
Parent
fa947eebfdcd782…
1 file changed
+188
-36
+188
-36
| --- src/search.c | ||
| +++ src/search.c | ||
| @@ -13,29 +13,38 @@ | ||
| 13 | 13 | ** [email protected] |
| 14 | 14 | ** http://www.hwaci.com/drh/ |
| 15 | 15 | ** |
| 16 | 16 | ******************************************************************************* |
| 17 | 17 | ** |
| 18 | -** This file contains code to implement a very simple search function | |
| 18 | +** This file contains code to implement a search functions | |
| 19 | 19 | ** against timeline comments, check-in content, wiki pages, and/or tickets. |
| 20 | 20 | ** |
| 21 | -** The search is full-text like in that it is looking for words and ignores | |
| 22 | -** punctuation and capitalization. But it is more akin to "grep" in that | |
| 23 | -** it scans the entire corpus for the search, and it does not support the | |
| 24 | -** full functionality of FTS4. | |
| 21 | +** The search can be either a per-query "grep"-like search that scans | |
| 22 | +** the entire corpus. Or it can use the FTS4 or FTS5 search engine of | |
| 23 | +** SQLite. The choice is a administrator configuration option. | |
| 24 | +** | |
| 25 | +** The first option is referred to as "full-scan search". The second | |
| 26 | +** option is called "indexed search". | |
| 27 | +** | |
| 28 | +** The code in this file is ordered approximately as follows: | |
| 29 | +** | |
| 30 | +** (1) The full-scan search engine | |
| 31 | +** (2) The indexed search engine | |
| 32 | +** (3) Higher level interfaces that uses use either (1) or (2) according | |
| 33 | +** to the current search configuration settings | |
| 25 | 34 | */ |
| 26 | 35 | #include "config.h" |
| 27 | 36 | #include "search.h" |
| 28 | 37 | #include <assert.h> |
| 29 | 38 | |
| 30 | 39 | #if INTERFACE |
| 31 | 40 | |
| 32 | -/* Maximum number of search terms */ | |
| 41 | +/* Maximum number of search terms for full-scan search */ | |
| 33 | 42 | #define SEARCH_MAX_TERM 8 |
| 34 | 43 | |
| 35 | 44 | /* |
| 36 | -** A compiled search pattern | |
| 45 | +** A compiled search pattern used for full-scan search. | |
| 37 | 46 | */ |
| 38 | 47 | struct Search { |
| 39 | 48 | int nTerm; /* Number of search terms */ |
| 40 | 49 | struct srchTerm { /* For each search term */ |
| 41 | 50 | char *z; /* Text */ |
| @@ -85,11 +94,11 @@ | ||
| 85 | 94 | }; |
| 86 | 95 | #define ISALNUM(x) (!isBoundary[(x)&0xff]) |
| 87 | 96 | |
| 88 | 97 | |
| 89 | 98 | /* |
| 90 | -** Destroy a search context. | |
| 99 | +** Destroy a full-scan search context. | |
| 91 | 100 | */ |
| 92 | 101 | void search_end(Search *p){ |
| 93 | 102 | if( p ){ |
| 94 | 103 | fossil_free(p->zPattern); |
| 95 | 104 | fossil_free(p->zMarkBegin); |
| @@ -100,11 +109,11 @@ | ||
| 100 | 109 | if( p!=&gSearch ) fossil_free(p); |
| 101 | 110 | } |
| 102 | 111 | } |
| 103 | 112 | |
| 104 | 113 | /* |
| 105 | -** Compile a search pattern | |
| 114 | +** Compile a full-scan search pattern | |
| 106 | 115 | */ |
| 107 | 116 | static Search *search_init( |
| 108 | 117 | const char *zPattern, /* The search pattern */ |
| 109 | 118 | const char *zMarkBegin, /* Start of a match */ |
| 110 | 119 | const char *zMarkEnd, /* End of a match */ |
| @@ -157,11 +166,12 @@ | ||
| 157 | 166 | blob_append(pSnip, zTxt, n); |
| 158 | 167 | } |
| 159 | 168 | } |
| 160 | 169 | } |
| 161 | 170 | |
| 162 | -/* | |
| 171 | +/* This the core search engine for full-scan search. | |
| 172 | +** | |
| 163 | 173 | ** Compare a search pattern against one or more input strings which |
| 164 | 174 | ** collectively comprise a document. Return a match score. Any |
| 165 | 175 | ** postive value means there was a match. Zero means that one or |
| 166 | 176 | ** more terms are missing. |
| 167 | 177 | ** |
| @@ -318,10 +328,13 @@ | ||
| 318 | 328 | |
| 319 | 329 | /* |
| 320 | 330 | ** COMMAND: test-match |
| 321 | 331 | ** |
| 322 | 332 | ** Usage: %fossil test-match SEARCHSTRING FILE1 FILE2 ... |
| 333 | +** | |
| 334 | +** Run the full-scan search algorithm using SEARCHSTRING against | |
| 335 | +** the text of the files listed. Output matches and snippets. | |
| 323 | 336 | */ |
| 324 | 337 | void test_match_cmd(void){ |
| 325 | 338 | Search *p; |
| 326 | 339 | int i; |
| 327 | 340 | Blob x; |
| @@ -351,15 +364,19 @@ | ||
| 351 | 364 | } |
| 352 | 365 | search_end(p); |
| 353 | 366 | } |
| 354 | 367 | |
| 355 | 368 | /* |
| 356 | -** An SQL function to initialize the global search pattern: | |
| 369 | +** An SQL function to initialize the full-scan search pattern: | |
| 357 | 370 | ** |
| 358 | 371 | ** search_init(PATTERN,BEGIN,END,GAP,FLAGS) |
| 359 | 372 | ** |
| 360 | -** All arguments are optional. | |
| 373 | +** All arguments are optional. PATTERN is the search pattern. If it | |
| 374 | +** is omitted, then the global search pattern is reset. BEGIN and END | |
| 375 | +** and GAP are the strings used to construct snippets. FLAGS is an | |
| 376 | +** integer bit pattern containing the various SRCH_CKIN, SRCH_DOC, | |
| 377 | +** SRCH_TKT, or SRCH_ALL bits to determine what is to be searched. | |
| 361 | 378 | */ |
| 362 | 379 | static void search_init_sqlfunc( |
| 363 | 380 | sqlite3_context *context, |
| 364 | 381 | int argc, |
| 365 | 382 | sqlite3_value **argv |
| @@ -386,13 +403,15 @@ | ||
| 386 | 403 | }else{ |
| 387 | 404 | search_end(&gSearch); |
| 388 | 405 | } |
| 389 | 406 | } |
| 390 | 407 | |
| 391 | -/* | |
| 392 | -** Try to match the input text against the search parameters set up | |
| 393 | -** by the previous search_init() call. Remember the results globally. | |
| 408 | +/* search_match(TEXT, TEXT, ....) | |
| 409 | +** | |
| 410 | +** Using the full-scan search engine created by the most recent call | |
| 411 | +** to search_init(), match the input the TEXT arguments. | |
| 412 | +** Remember the results global full-scan search object. | |
| 394 | 413 | ** Return non-zero on a match and zero on a miss. |
| 395 | 414 | */ |
| 396 | 415 | static void search_match_sqlfunc( |
| 397 | 416 | sqlite3_context *context, |
| 398 | 417 | int argc, |
| @@ -407,21 +426,27 @@ | ||
| 407 | 426 | } |
| 408 | 427 | rc = search_match(&gSearch, nDoc, azDoc); |
| 409 | 428 | sqlite3_result_int(context, rc); |
| 410 | 429 | } |
| 411 | 430 | |
| 412 | -/* | |
| 413 | -** These SQL functions return the results of the last | |
| 414 | -** call to the search_match() SQL function. | |
| 431 | + | |
| 432 | +/* search_score() | |
| 433 | +** | |
| 434 | +** Return the match score for the last successful search_match call. | |
| 415 | 435 | */ |
| 416 | 436 | static void search_score_sqlfunc( |
| 417 | 437 | sqlite3_context *context, |
| 418 | 438 | int argc, |
| 419 | 439 | sqlite3_value **argv |
| 420 | 440 | ){ |
| 421 | 441 | sqlite3_result_int(context, gSearch.iScore); |
| 422 | 442 | } |
| 443 | + | |
| 444 | +/* search_snippet() | |
| 445 | +** | |
| 446 | +** Return a snippet for the last successful search_match() call. | |
| 447 | +*/ | |
| 423 | 448 | static void search_snippet_sqlfunc( |
| 424 | 449 | sqlite3_context *context, |
| 425 | 450 | int argc, |
| 426 | 451 | sqlite3_value **argv |
| 427 | 452 | ){ |
| @@ -429,11 +454,12 @@ | ||
| 429 | 454 | sqlite3_result_text(context, blob_str(&gSearch.snip), -1, fossil_free); |
| 430 | 455 | blob_init(&gSearch.snip, 0, 0); |
| 431 | 456 | } |
| 432 | 457 | } |
| 433 | 458 | |
| 434 | -/* | |
| 459 | +/* stext(TYPE, RID, ARG) | |
| 460 | +** | |
| 435 | 461 | ** This is an SQLite function that computes the searchable text. |
| 436 | 462 | ** It is a wrapper around the search_stext() routine. See the |
| 437 | 463 | ** search_stext() routine for further detail. |
| 438 | 464 | */ |
| 439 | 465 | static void search_stext_sqlfunc( |
| @@ -445,10 +471,15 @@ | ||
| 445 | 471 | int rid = sqlite3_value_int(argv[1]); |
| 446 | 472 | const char *zName = (const char*)sqlite3_value_text(argv[2]); |
| 447 | 473 | sqlite3_result_text(context, search_stext_cached(zType[0],rid,zName,0), -1, |
| 448 | 474 | SQLITE_TRANSIENT); |
| 449 | 475 | } |
| 476 | + | |
| 477 | +/* title(TYPE, RID, ARG) | |
| 478 | +** | |
| 479 | +** Return the title of the document to be search. | |
| 480 | +*/ | |
| 450 | 481 | static void search_title_sqlfunc( |
| 451 | 482 | sqlite3_context *context, |
| 452 | 483 | int argc, |
| 453 | 484 | sqlite3_value **argv |
| 454 | 485 | ){ |
| @@ -461,10 +492,15 @@ | ||
| 461 | 492 | sqlite3_result_text(context, z, nHdr, SQLITE_TRANSIENT); |
| 462 | 493 | }else{ |
| 463 | 494 | sqlite3_result_value(context, argv[2]); |
| 464 | 495 | } |
| 465 | 496 | } |
| 497 | + | |
| 498 | +/* body(TYPE, RID, ARG) | |
| 499 | +** | |
| 500 | +** Return the body of the document to be search. | |
| 501 | +*/ | |
| 466 | 502 | static void search_body_sqlfunc( |
| 467 | 503 | sqlite3_context *context, |
| 468 | 504 | int argc, |
| 469 | 505 | sqlite3_value **argv |
| 470 | 506 | ){ |
| @@ -474,12 +510,14 @@ | ||
| 474 | 510 | int nHdr = 0; |
| 475 | 511 | char *z = search_stext_cached(zType[0], rid, zName, &nHdr); |
| 476 | 512 | sqlite3_result_text(context, z+nHdr+1, -1, SQLITE_TRANSIENT); |
| 477 | 513 | } |
| 478 | 514 | |
| 479 | -/* | |
| 480 | -** Encode a string for use as a query parameter in a URL | |
| 515 | +/* urlencode(X) | |
| 516 | +** | |
| 517 | +** Encode a string for use as a query parameter in a URL. This is | |
| 518 | +** the equivalent of printf("%T",X). | |
| 481 | 519 | */ |
| 482 | 520 | static void search_urlencode_sqlfunc( |
| 483 | 521 | sqlite3_context *context, |
| 484 | 522 | int argc, |
| 485 | 523 | sqlite3_value **argv |
| @@ -487,13 +525,12 @@ | ||
| 487 | 525 | char *z = mprintf("%T",sqlite3_value_text(argv[0])); |
| 488 | 526 | sqlite3_result_text(context, z, -1, fossil_free); |
| 489 | 527 | } |
| 490 | 528 | |
| 491 | 529 | /* |
| 492 | -** Register the "score()" SQL function to score its input text | |
| 493 | -** using the given Search object. Once this function is registered, | |
| 494 | -** do not delete the Search object. | |
| 530 | +** Register the various SQL functions (defined above) needed to implement | |
| 531 | +** full-scan search. | |
| 495 | 532 | */ |
| 496 | 533 | void search_sql_setup(sqlite3 *db){ |
| 497 | 534 | static int once = 0; |
| 498 | 535 | if( once++ ) return; |
| 499 | 536 | sqlite3_create_function(db, "search_match", -1, SQLITE_UTF8, 0, |
| @@ -636,16 +673,22 @@ | ||
| 636 | 673 | } |
| 637 | 674 | |
| 638 | 675 | /* |
| 639 | 676 | ** When this routine is called, there already exists a table |
| 640 | 677 | ** |
| 641 | -** x(label,url,score,date,snip). | |
| 678 | +** x(label,url,score,id,snip). | |
| 679 | +** | |
| 680 | +** label: The "name" of the document containing the match | |
| 681 | +** url: A URL for the document | |
| 682 | +** score: How well the document matched | |
| 683 | +** id: The document id. Format: xNNNNN, x: type, N: number | |
| 684 | +** snip: A snippet for the match | |
| 642 | 685 | ** |
| 643 | 686 | ** And the srchFlags parameter has been validated. This routine |
| 644 | -** fills the X table with search results using a full-text scan. | |
| 687 | +** fills the X table with search results using a full-scan search. | |
| 645 | 688 | ** |
| 646 | -** The companion indexed scan routine is search_indexed(). | |
| 689 | +** The companion indexed search routine is search_indexed(). | |
| 647 | 690 | */ |
| 648 | 691 | static void search_fullscan( |
| 649 | 692 | const char *zPattern, /* The query pattern */ |
| 650 | 693 | unsigned int srchFlags /* What to search over */ |
| 651 | 694 | ){ |
| @@ -805,16 +848,22 @@ | ||
| 805 | 848 | } |
| 806 | 849 | |
| 807 | 850 | /* |
| 808 | 851 | ** When this routine is called, there already exists a table |
| 809 | 852 | ** |
| 810 | -** x(label,url,score,date,snip). | |
| 853 | +** x(label,url,score,id,snip). | |
| 854 | +** | |
| 855 | +** label: The "name" of the document containing the match | |
| 856 | +** url: A URL for the document | |
| 857 | +** score: How well the document matched | |
| 858 | +** id: The document id. Format: xNNNNN, x: type, N: number | |
| 859 | +** snip: A snippet for the match | |
| 811 | 860 | ** |
| 812 | 861 | ** And the srchFlags parameter has been validated. This routine |
| 813 | -** fills the X table with search results using a index scan. | |
| 862 | +** fills the X table with search results using FTS indexed search. | |
| 814 | 863 | ** |
| 815 | -** The companion full-text scan routine is search_fullscan(). | |
| 864 | +** The companion full-scan search routine is search_fullscan(). | |
| 816 | 865 | */ |
| 817 | 866 | static void search_indexed( |
| 818 | 867 | const char *zPattern, /* The query pattern */ |
| 819 | 868 | unsigned int srchFlags /* What to search over */ |
| 820 | 869 | ){ |
| @@ -910,10 +959,14 @@ | ||
| 910 | 959 | |
| 911 | 960 | /* |
| 912 | 961 | ** This routine generates web-page output for a search operation. |
| 913 | 962 | ** Other web-pages can invoke this routine to add search results |
| 914 | 963 | ** in the middle of the page. |
| 964 | +** | |
| 965 | +** This routine works for both full-scan and indexed search. The | |
| 966 | +** appropriate low-level search routine is called according to the | |
| 967 | +** current configuration. | |
| 915 | 968 | ** |
| 916 | 969 | ** Return the number of rows. |
| 917 | 970 | */ |
| 918 | 971 | int search_run_and_output( |
| 919 | 972 | const char *zPattern, /* The query pattern */ |
| @@ -930,14 +983,14 @@ | ||
| 930 | 983 | add_content_sql_commands(g.db); |
| 931 | 984 | db_multi_exec( |
| 932 | 985 | "CREATE TEMP TABLE x(label,url,score,id,date,snip);" |
| 933 | 986 | ); |
| 934 | 987 | if( !search_index_exists() ){ |
| 935 | - search_fullscan(zPattern, srchFlags); | |
| 988 | + search_fullscan(zPattern, srchFlags); /* Full-scan search */ | |
| 936 | 989 | }else{ |
| 937 | - search_update_index(srchFlags); | |
| 938 | - search_indexed(zPattern, srchFlags); | |
| 990 | + search_update_index(srchFlags); /* Update the index, if necessary */ | |
| 991 | + search_indexed(zPattern, srchFlags); /* Indexed search */ | |
| 939 | 992 | } |
| 940 | 993 | db_prepare(&q, "SELECT url, snip, label, score, id" |
| 941 | 994 | " FROM x" |
| 942 | 995 | " ORDER BY score DESC, date DESC;"); |
| 943 | 996 | while( db_step(&q)==SQLITE_ROW ){ |
| @@ -1185,11 +1238,14 @@ | ||
| 1185 | 1238 | ** t Ticket text |
| 1186 | 1239 | ** |
| 1187 | 1240 | ** rid The RID of an artifact that defines the object |
| 1188 | 1241 | ** being searched. |
| 1189 | 1242 | ** |
| 1190 | -** zName Name of the object being searched. | |
| 1243 | +** zName Name of the object being searched. This is used | |
| 1244 | +** only to help figure out the mimetype (text/plain, | |
| 1245 | +** test/html, test/x-fossil-wiki, or text/x-markdown) | |
| 1246 | +** so that the code can know how to simplify the text. | |
| 1191 | 1247 | */ |
| 1192 | 1248 | void search_stext( |
| 1193 | 1249 | char cType, /* Type of document */ |
| 1194 | 1250 | int rid, /* BLOB.RID or TAG.TAGID value for document */ |
| 1195 | 1251 | const char *zName, /* Auxiliary information */ |
| @@ -1288,11 +1344,11 @@ | ||
| 1288 | 1344 | ** with an eType of 0 to clear the cache. |
| 1289 | 1345 | */ |
| 1290 | 1346 | char *search_stext_cached( |
| 1291 | 1347 | char cType, /* Type of document */ |
| 1292 | 1348 | int rid, /* BLOB.RID or TAG.TAGID value for document */ |
| 1293 | - const char *zName, /* Auxiliary information */ | |
| 1349 | + const char *zName, /* Auxiliary information, for mimetype */ | |
| 1294 | 1350 | int *pnTitle /* OUT: length of title in bytes excluding \n */ |
| 1295 | 1351 | ){ |
| 1296 | 1352 | static struct { |
| 1297 | 1353 | Blob stext; /* Cached search text */ |
| 1298 | 1354 | char cType; /* The type */ |
| @@ -1320,11 +1376,16 @@ | ||
| 1320 | 1376 | } |
| 1321 | 1377 | |
| 1322 | 1378 | /* |
| 1323 | 1379 | ** COMMAND: test-search-stext |
| 1324 | 1380 | ** |
| 1325 | -** Usage: fossil test-search-stext TYPE ARG1 ARG2 | |
| 1381 | +** Usage: fossil test-search-stext TYPE RID NAME | |
| 1382 | +** | |
| 1383 | +** Compute the search text for document TYPE-RID whose name is NAME. | |
| 1384 | +** The TYPE is one of "c", "d", "t", or "w". The RID is the document | |
| 1385 | +** ID. The NAME is used to figure out a mimetype to use for formatting | |
| 1386 | +** the raw document text. | |
| 1326 | 1387 | */ |
| 1327 | 1388 | void test_search_stext(void){ |
| 1328 | 1389 | Blob out; |
| 1329 | 1390 | db_find_and_open_repository(0,0); |
| 1330 | 1391 | if( g.argc!=5 ) usage("TYPE RID NAME"); |
| @@ -1742,5 +1803,96 @@ | ||
| 1742 | 1803 | }else{ |
| 1743 | 1804 | fossil_print("%-16s disabled\n", "full-text index:"); |
| 1744 | 1805 | } |
| 1745 | 1806 | db_end_transaction(0); |
| 1746 | 1807 | } |
| 1808 | + | |
| 1809 | +/* | |
| 1810 | +** WEBPAGE: test-ftsdocs | |
| 1811 | +** | |
| 1812 | +** Show a table of all documents currently in the search index. | |
| 1813 | +*/ | |
| 1814 | +void search_data_page(void){ | |
| 1815 | + Stmt q; | |
| 1816 | + const char *zId = P("id"); | |
| 1817 | + const char *zType = P("y"); | |
| 1818 | + const char *zIdxed = P("ixed"); | |
| 1819 | + int id; | |
| 1820 | + int cnt = 0; | |
| 1821 | + login_check_credentials(); | |
| 1822 | + if( !g.perm.Admin ){ login_needed(0); return; } | |
| 1823 | + if( !search_index_exists() ){ | |
| 1824 | + @ <p>Indexed search is disabled | |
| 1825 | + style_footer(); | |
| 1826 | + return; | |
| 1827 | + } | |
| 1828 | + if( zId!=0 && (id = atoi(zId))>0 ){ | |
| 1829 | + /* Show information about a single ftsdocs entry */ | |
| 1830 | + style_header("Information about ftsdoc entry %d", id); | |
| 1831 | + db_prepare(&q, | |
| 1832 | + "SELECT type||rid, name, idxed, label, url, datetime(mtime)" | |
| 1833 | + " FROM ftsdocs WHERE rowid=%d", id | |
| 1834 | + ); | |
| 1835 | + if( db_step(&q)==SQLITE_ROW ){ | |
| 1836 | + const char *zUrl = db_column_text(&q,4); | |
| 1837 | + @ <table border=0> | |
| 1838 | + @ <tr><td align='right'>rowid:<td> <td>%d(id) | |
| 1839 | + @ <tr><td align='right'>id:<td><td>%s(db_column_text(&q,0)) | |
| 1840 | + @ <tr><td align='right'>name:<td><td>%h(db_column_text(&q,1)) | |
| 1841 | + @ <tr><td align='right'>idxed:<td><td>%d(db_column_int(&q,2)) | |
| 1842 | + @ <tr><td align='right'>label:<td><td>%h(db_column_text(&q,3)) | |
| 1843 | + @ <tr><td align='right'>url:<td><td> | |
| 1844 | + @ <a href='%R%s(zUrl)'>%h(zUrl)</a> | |
| 1845 | + @ <tr><td align='right'>mtime:<td><td>%s(db_column_text(&q,5)) | |
| 1846 | + @ </table> | |
| 1847 | + } | |
| 1848 | + db_finalize(&q); | |
| 1849 | + style_footer(); | |
| 1850 | + return; | |
| 1851 | + } | |
| 1852 | + if( zType!=0 && zType[0]!=0 && zType[1]==0 && | |
| 1853 | + zIdxed!=0 && (zIdxed[0]=='1' || zIdxed[0]=='0') && zIdxed[1]==0 | |
| 1854 | + ){ | |
| 1855 | + int ixed = zIdxed[0]=='1'; | |
| 1856 | + style_header("List of '%c' documents that are%s indexed", | |
| 1857 | + zType[0], ixed ? "" : " not"); | |
| 1858 | + db_prepare(&q, | |
| 1859 | + "SELECT rowid, type||rid ||' '|| coalesce(label,'')" | |
| 1860 | + " FROM ftsdocs WHERE type='%c' AND %s idxed", | |
| 1861 | + zType[0], ixed ? "" : "NOT" | |
| 1862 | + ); | |
| 1863 | + @ <ul> | |
| 1864 | + while( db_step(&q)==SQLITE_ROW ){ | |
| 1865 | + @ <li> <a href='test-ftsdocs?id=%d(db_column_int(&q,0))'> | |
| 1866 | + @ %h(db_column_text(&q,1))</a> | |
| 1867 | + } | |
| 1868 | + @ </ul> | |
| 1869 | + db_finalize(&q); | |
| 1870 | + style_footer(); | |
| 1871 | + return; | |
| 1872 | + } | |
| 1873 | + style_header("Summary of ftsdocs"); | |
| 1874 | + db_prepare(&q, | |
| 1875 | + "SELECT type, idxed, count(*) FROM ftsdocs" | |
| 1876 | + " GROUP BY 1, 2 ORDER BY 3 DESC" | |
| 1877 | + ); | |
| 1878 | + @ <table border=1 cellpadding=3 cellspacing=0> | |
| 1879 | + @ <thead> | |
| 1880 | + @ <tr><th>Type<th>Indexed?<th>Count<th>Link | |
| 1881 | + @ </thead> | |
| 1882 | + @ <tbody> | |
| 1883 | + while( db_step(&q)==SQLITE_ROW ){ | |
| 1884 | + const char *zType = db_column_text(&q,0); | |
| 1885 | + int idxed = db_column_int(&q,1); | |
| 1886 | + int n = db_column_int(&q,2); | |
| 1887 | + @ <tr><td>%h(zType)<td>%d(idxed) | |
| 1888 | + @ <td>%d(n) | |
| 1889 | + @ <td><a href='test-ftsdocs?y=%s(zType)&ixed=%d(idxed)'>listing</a> | |
| 1890 | + @ </tr> | |
| 1891 | + cnt += n; | |
| 1892 | + } | |
| 1893 | + @ </tbody><tfooter> | |
| 1894 | + @ <tr><th>Total<th><th>%d(cnt)<th> | |
| 1895 | + @ </tfooter> | |
| 1896 | + @ </table> | |
| 1897 | + style_footer(); | |
| 1898 | +} | |
| 1747 | 1899 |
| --- src/search.c | |
| +++ src/search.c | |
| @@ -13,29 +13,38 @@ | |
| 13 | ** [email protected] |
| 14 | ** http://www.hwaci.com/drh/ |
| 15 | ** |
| 16 | ******************************************************************************* |
| 17 | ** |
| 18 | ** This file contains code to implement a very simple search function |
| 19 | ** against timeline comments, check-in content, wiki pages, and/or tickets. |
| 20 | ** |
| 21 | ** The search is full-text like in that it is looking for words and ignores |
| 22 | ** punctuation and capitalization. But it is more akin to "grep" in that |
| 23 | ** it scans the entire corpus for the search, and it does not support the |
| 24 | ** full functionality of FTS4. |
| 25 | */ |
| 26 | #include "config.h" |
| 27 | #include "search.h" |
| 28 | #include <assert.h> |
| 29 | |
| 30 | #if INTERFACE |
| 31 | |
| 32 | /* Maximum number of search terms */ |
| 33 | #define SEARCH_MAX_TERM 8 |
| 34 | |
| 35 | /* |
| 36 | ** A compiled search pattern |
| 37 | */ |
| 38 | struct Search { |
| 39 | int nTerm; /* Number of search terms */ |
| 40 | struct srchTerm { /* For each search term */ |
| 41 | char *z; /* Text */ |
| @@ -85,11 +94,11 @@ | |
| 85 | }; |
| 86 | #define ISALNUM(x) (!isBoundary[(x)&0xff]) |
| 87 | |
| 88 | |
| 89 | /* |
| 90 | ** Destroy a search context. |
| 91 | */ |
| 92 | void search_end(Search *p){ |
| 93 | if( p ){ |
| 94 | fossil_free(p->zPattern); |
| 95 | fossil_free(p->zMarkBegin); |
| @@ -100,11 +109,11 @@ | |
| 100 | if( p!=&gSearch ) fossil_free(p); |
| 101 | } |
| 102 | } |
| 103 | |
| 104 | /* |
| 105 | ** Compile a search pattern |
| 106 | */ |
| 107 | static Search *search_init( |
| 108 | const char *zPattern, /* The search pattern */ |
| 109 | const char *zMarkBegin, /* Start of a match */ |
| 110 | const char *zMarkEnd, /* End of a match */ |
| @@ -157,11 +166,12 @@ | |
| 157 | blob_append(pSnip, zTxt, n); |
| 158 | } |
| 159 | } |
| 160 | } |
| 161 | |
| 162 | /* |
| 163 | ** Compare a search pattern against one or more input strings which |
| 164 | ** collectively comprise a document. Return a match score. Any |
| 165 | ** postive value means there was a match. Zero means that one or |
| 166 | ** more terms are missing. |
| 167 | ** |
| @@ -318,10 +328,13 @@ | |
| 318 | |
| 319 | /* |
| 320 | ** COMMAND: test-match |
| 321 | ** |
| 322 | ** Usage: %fossil test-match SEARCHSTRING FILE1 FILE2 ... |
| 323 | */ |
| 324 | void test_match_cmd(void){ |
| 325 | Search *p; |
| 326 | int i; |
| 327 | Blob x; |
| @@ -351,15 +364,19 @@ | |
| 351 | } |
| 352 | search_end(p); |
| 353 | } |
| 354 | |
| 355 | /* |
| 356 | ** An SQL function to initialize the global search pattern: |
| 357 | ** |
| 358 | ** search_init(PATTERN,BEGIN,END,GAP,FLAGS) |
| 359 | ** |
| 360 | ** All arguments are optional. |
| 361 | */ |
| 362 | static void search_init_sqlfunc( |
| 363 | sqlite3_context *context, |
| 364 | int argc, |
| 365 | sqlite3_value **argv |
| @@ -386,13 +403,15 @@ | |
| 386 | }else{ |
| 387 | search_end(&gSearch); |
| 388 | } |
| 389 | } |
| 390 | |
| 391 | /* |
| 392 | ** Try to match the input text against the search parameters set up |
| 393 | ** by the previous search_init() call. Remember the results globally. |
| 394 | ** Return non-zero on a match and zero on a miss. |
| 395 | */ |
| 396 | static void search_match_sqlfunc( |
| 397 | sqlite3_context *context, |
| 398 | int argc, |
| @@ -407,21 +426,27 @@ | |
| 407 | } |
| 408 | rc = search_match(&gSearch, nDoc, azDoc); |
| 409 | sqlite3_result_int(context, rc); |
| 410 | } |
| 411 | |
| 412 | /* |
| 413 | ** These SQL functions return the results of the last |
| 414 | ** call to the search_match() SQL function. |
| 415 | */ |
| 416 | static void search_score_sqlfunc( |
| 417 | sqlite3_context *context, |
| 418 | int argc, |
| 419 | sqlite3_value **argv |
| 420 | ){ |
| 421 | sqlite3_result_int(context, gSearch.iScore); |
| 422 | } |
| 423 | static void search_snippet_sqlfunc( |
| 424 | sqlite3_context *context, |
| 425 | int argc, |
| 426 | sqlite3_value **argv |
| 427 | ){ |
| @@ -429,11 +454,12 @@ | |
| 429 | sqlite3_result_text(context, blob_str(&gSearch.snip), -1, fossil_free); |
| 430 | blob_init(&gSearch.snip, 0, 0); |
| 431 | } |
| 432 | } |
| 433 | |
| 434 | /* |
| 435 | ** This is an SQLite function that computes the searchable text. |
| 436 | ** It is a wrapper around the search_stext() routine. See the |
| 437 | ** search_stext() routine for further detail. |
| 438 | */ |
| 439 | static void search_stext_sqlfunc( |
| @@ -445,10 +471,15 @@ | |
| 445 | int rid = sqlite3_value_int(argv[1]); |
| 446 | const char *zName = (const char*)sqlite3_value_text(argv[2]); |
| 447 | sqlite3_result_text(context, search_stext_cached(zType[0],rid,zName,0), -1, |
| 448 | SQLITE_TRANSIENT); |
| 449 | } |
| 450 | static void search_title_sqlfunc( |
| 451 | sqlite3_context *context, |
| 452 | int argc, |
| 453 | sqlite3_value **argv |
| 454 | ){ |
| @@ -461,10 +492,15 @@ | |
| 461 | sqlite3_result_text(context, z, nHdr, SQLITE_TRANSIENT); |
| 462 | }else{ |
| 463 | sqlite3_result_value(context, argv[2]); |
| 464 | } |
| 465 | } |
| 466 | static void search_body_sqlfunc( |
| 467 | sqlite3_context *context, |
| 468 | int argc, |
| 469 | sqlite3_value **argv |
| 470 | ){ |
| @@ -474,12 +510,14 @@ | |
| 474 | int nHdr = 0; |
| 475 | char *z = search_stext_cached(zType[0], rid, zName, &nHdr); |
| 476 | sqlite3_result_text(context, z+nHdr+1, -1, SQLITE_TRANSIENT); |
| 477 | } |
| 478 | |
| 479 | /* |
| 480 | ** Encode a string for use as a query parameter in a URL |
| 481 | */ |
| 482 | static void search_urlencode_sqlfunc( |
| 483 | sqlite3_context *context, |
| 484 | int argc, |
| 485 | sqlite3_value **argv |
| @@ -487,13 +525,12 @@ | |
| 487 | char *z = mprintf("%T",sqlite3_value_text(argv[0])); |
| 488 | sqlite3_result_text(context, z, -1, fossil_free); |
| 489 | } |
| 490 | |
| 491 | /* |
| 492 | ** Register the "score()" SQL function to score its input text |
| 493 | ** using the given Search object. Once this function is registered, |
| 494 | ** do not delete the Search object. |
| 495 | */ |
| 496 | void search_sql_setup(sqlite3 *db){ |
| 497 | static int once = 0; |
| 498 | if( once++ ) return; |
| 499 | sqlite3_create_function(db, "search_match", -1, SQLITE_UTF8, 0, |
| @@ -636,16 +673,22 @@ | |
| 636 | } |
| 637 | |
| 638 | /* |
| 639 | ** When this routine is called, there already exists a table |
| 640 | ** |
| 641 | ** x(label,url,score,date,snip). |
| 642 | ** |
| 643 | ** And the srchFlags parameter has been validated. This routine |
| 644 | ** fills the X table with search results using a full-text scan. |
| 645 | ** |
| 646 | ** The companion indexed scan routine is search_indexed(). |
| 647 | */ |
| 648 | static void search_fullscan( |
| 649 | const char *zPattern, /* The query pattern */ |
| 650 | unsigned int srchFlags /* What to search over */ |
| 651 | ){ |
| @@ -805,16 +848,22 @@ | |
| 805 | } |
| 806 | |
| 807 | /* |
| 808 | ** When this routine is called, there already exists a table |
| 809 | ** |
| 810 | ** x(label,url,score,date,snip). |
| 811 | ** |
| 812 | ** And the srchFlags parameter has been validated. This routine |
| 813 | ** fills the X table with search results using a index scan. |
| 814 | ** |
| 815 | ** The companion full-text scan routine is search_fullscan(). |
| 816 | */ |
| 817 | static void search_indexed( |
| 818 | const char *zPattern, /* The query pattern */ |
| 819 | unsigned int srchFlags /* What to search over */ |
| 820 | ){ |
| @@ -910,10 +959,14 @@ | |
| 910 | |
| 911 | /* |
| 912 | ** This routine generates web-page output for a search operation. |
| 913 | ** Other web-pages can invoke this routine to add search results |
| 914 | ** in the middle of the page. |
| 915 | ** |
| 916 | ** Return the number of rows. |
| 917 | */ |
| 918 | int search_run_and_output( |
| 919 | const char *zPattern, /* The query pattern */ |
| @@ -930,14 +983,14 @@ | |
| 930 | add_content_sql_commands(g.db); |
| 931 | db_multi_exec( |
| 932 | "CREATE TEMP TABLE x(label,url,score,id,date,snip);" |
| 933 | ); |
| 934 | if( !search_index_exists() ){ |
| 935 | search_fullscan(zPattern, srchFlags); |
| 936 | }else{ |
| 937 | search_update_index(srchFlags); |
| 938 | search_indexed(zPattern, srchFlags); |
| 939 | } |
| 940 | db_prepare(&q, "SELECT url, snip, label, score, id" |
| 941 | " FROM x" |
| 942 | " ORDER BY score DESC, date DESC;"); |
| 943 | while( db_step(&q)==SQLITE_ROW ){ |
| @@ -1185,11 +1238,14 @@ | |
| 1185 | ** t Ticket text |
| 1186 | ** |
| 1187 | ** rid The RID of an artifact that defines the object |
| 1188 | ** being searched. |
| 1189 | ** |
| 1190 | ** zName Name of the object being searched. |
| 1191 | */ |
| 1192 | void search_stext( |
| 1193 | char cType, /* Type of document */ |
| 1194 | int rid, /* BLOB.RID or TAG.TAGID value for document */ |
| 1195 | const char *zName, /* Auxiliary information */ |
| @@ -1288,11 +1344,11 @@ | |
| 1288 | ** with an eType of 0 to clear the cache. |
| 1289 | */ |
| 1290 | char *search_stext_cached( |
| 1291 | char cType, /* Type of document */ |
| 1292 | int rid, /* BLOB.RID or TAG.TAGID value for document */ |
| 1293 | const char *zName, /* Auxiliary information */ |
| 1294 | int *pnTitle /* OUT: length of title in bytes excluding \n */ |
| 1295 | ){ |
| 1296 | static struct { |
| 1297 | Blob stext; /* Cached search text */ |
| 1298 | char cType; /* The type */ |
| @@ -1320,11 +1376,16 @@ | |
| 1320 | } |
| 1321 | |
| 1322 | /* |
| 1323 | ** COMMAND: test-search-stext |
| 1324 | ** |
| 1325 | ** Usage: fossil test-search-stext TYPE ARG1 ARG2 |
| 1326 | */ |
| 1327 | void test_search_stext(void){ |
| 1328 | Blob out; |
| 1329 | db_find_and_open_repository(0,0); |
| 1330 | if( g.argc!=5 ) usage("TYPE RID NAME"); |
| @@ -1742,5 +1803,96 @@ | |
| 1742 | }else{ |
| 1743 | fossil_print("%-16s disabled\n", "full-text index:"); |
| 1744 | } |
| 1745 | db_end_transaction(0); |
| 1746 | } |
| 1747 |
| --- src/search.c | |
| +++ src/search.c | |
| @@ -13,29 +13,38 @@ | |
| 13 | ** [email protected] |
| 14 | ** http://www.hwaci.com/drh/ |
| 15 | ** |
| 16 | ******************************************************************************* |
| 17 | ** |
| 18 | ** This file contains code to implement a search functions |
| 19 | ** against timeline comments, check-in content, wiki pages, and/or tickets. |
| 20 | ** |
| 21 | ** The search can be either a per-query "grep"-like search that scans |
| 22 | ** the entire corpus. Or it can use the FTS4 or FTS5 search engine of |
| 23 | ** SQLite. The choice is a administrator configuration option. |
| 24 | ** |
| 25 | ** The first option is referred to as "full-scan search". The second |
| 26 | ** option is called "indexed search". |
| 27 | ** |
| 28 | ** The code in this file is ordered approximately as follows: |
| 29 | ** |
| 30 | ** (1) The full-scan search engine |
| 31 | ** (2) The indexed search engine |
| 32 | ** (3) Higher level interfaces that uses use either (1) or (2) according |
| 33 | ** to the current search configuration settings |
| 34 | */ |
| 35 | #include "config.h" |
| 36 | #include "search.h" |
| 37 | #include <assert.h> |
| 38 | |
| 39 | #if INTERFACE |
| 40 | |
| 41 | /* Maximum number of search terms for full-scan search */ |
| 42 | #define SEARCH_MAX_TERM 8 |
| 43 | |
| 44 | /* |
| 45 | ** A compiled search pattern used for full-scan search. |
| 46 | */ |
| 47 | struct Search { |
| 48 | int nTerm; /* Number of search terms */ |
| 49 | struct srchTerm { /* For each search term */ |
| 50 | char *z; /* Text */ |
| @@ -85,11 +94,11 @@ | |
| 94 | }; |
| 95 | #define ISALNUM(x) (!isBoundary[(x)&0xff]) |
| 96 | |
| 97 | |
| 98 | /* |
| 99 | ** Destroy a full-scan search context. |
| 100 | */ |
| 101 | void search_end(Search *p){ |
| 102 | if( p ){ |
| 103 | fossil_free(p->zPattern); |
| 104 | fossil_free(p->zMarkBegin); |
| @@ -100,11 +109,11 @@ | |
| 109 | if( p!=&gSearch ) fossil_free(p); |
| 110 | } |
| 111 | } |
| 112 | |
| 113 | /* |
| 114 | ** Compile a full-scan search pattern |
| 115 | */ |
| 116 | static Search *search_init( |
| 117 | const char *zPattern, /* The search pattern */ |
| 118 | const char *zMarkBegin, /* Start of a match */ |
| 119 | const char *zMarkEnd, /* End of a match */ |
| @@ -157,11 +166,12 @@ | |
| 166 | blob_append(pSnip, zTxt, n); |
| 167 | } |
| 168 | } |
| 169 | } |
| 170 | |
| 171 | /* This the core search engine for full-scan search. |
| 172 | ** |
| 173 | ** Compare a search pattern against one or more input strings which |
| 174 | ** collectively comprise a document. Return a match score. Any |
| 175 | ** postive value means there was a match. Zero means that one or |
| 176 | ** more terms are missing. |
| 177 | ** |
| @@ -318,10 +328,13 @@ | |
| 328 | |
| 329 | /* |
| 330 | ** COMMAND: test-match |
| 331 | ** |
| 332 | ** Usage: %fossil test-match SEARCHSTRING FILE1 FILE2 ... |
| 333 | ** |
| 334 | ** Run the full-scan search algorithm using SEARCHSTRING against |
| 335 | ** the text of the files listed. Output matches and snippets. |
| 336 | */ |
| 337 | void test_match_cmd(void){ |
| 338 | Search *p; |
| 339 | int i; |
| 340 | Blob x; |
| @@ -351,15 +364,19 @@ | |
| 364 | } |
| 365 | search_end(p); |
| 366 | } |
| 367 | |
| 368 | /* |
| 369 | ** An SQL function to initialize the full-scan search pattern: |
| 370 | ** |
| 371 | ** search_init(PATTERN,BEGIN,END,GAP,FLAGS) |
| 372 | ** |
| 373 | ** All arguments are optional. PATTERN is the search pattern. If it |
| 374 | ** is omitted, then the global search pattern is reset. BEGIN and END |
| 375 | ** and GAP are the strings used to construct snippets. FLAGS is an |
| 376 | ** integer bit pattern containing the various SRCH_CKIN, SRCH_DOC, |
| 377 | ** SRCH_TKT, or SRCH_ALL bits to determine what is to be searched. |
| 378 | */ |
| 379 | static void search_init_sqlfunc( |
| 380 | sqlite3_context *context, |
| 381 | int argc, |
| 382 | sqlite3_value **argv |
| @@ -386,13 +403,15 @@ | |
| 403 | }else{ |
| 404 | search_end(&gSearch); |
| 405 | } |
| 406 | } |
| 407 | |
| 408 | /* search_match(TEXT, TEXT, ....) |
| 409 | ** |
| 410 | ** Using the full-scan search engine created by the most recent call |
| 411 | ** to search_init(), match the input the TEXT arguments. |
| 412 | ** Remember the results global full-scan search object. |
| 413 | ** Return non-zero on a match and zero on a miss. |
| 414 | */ |
| 415 | static void search_match_sqlfunc( |
| 416 | sqlite3_context *context, |
| 417 | int argc, |
| @@ -407,21 +426,27 @@ | |
| 426 | } |
| 427 | rc = search_match(&gSearch, nDoc, azDoc); |
| 428 | sqlite3_result_int(context, rc); |
| 429 | } |
| 430 | |
| 431 | |
| 432 | /* search_score() |
| 433 | ** |
| 434 | ** Return the match score for the last successful search_match call. |
| 435 | */ |
| 436 | static void search_score_sqlfunc( |
| 437 | sqlite3_context *context, |
| 438 | int argc, |
| 439 | sqlite3_value **argv |
| 440 | ){ |
| 441 | sqlite3_result_int(context, gSearch.iScore); |
| 442 | } |
| 443 | |
| 444 | /* search_snippet() |
| 445 | ** |
| 446 | ** Return a snippet for the last successful search_match() call. |
| 447 | */ |
| 448 | static void search_snippet_sqlfunc( |
| 449 | sqlite3_context *context, |
| 450 | int argc, |
| 451 | sqlite3_value **argv |
| 452 | ){ |
| @@ -429,11 +454,12 @@ | |
| 454 | sqlite3_result_text(context, blob_str(&gSearch.snip), -1, fossil_free); |
| 455 | blob_init(&gSearch.snip, 0, 0); |
| 456 | } |
| 457 | } |
| 458 | |
| 459 | /* stext(TYPE, RID, ARG) |
| 460 | ** |
| 461 | ** This is an SQLite function that computes the searchable text. |
| 462 | ** It is a wrapper around the search_stext() routine. See the |
| 463 | ** search_stext() routine for further detail. |
| 464 | */ |
| 465 | static void search_stext_sqlfunc( |
| @@ -445,10 +471,15 @@ | |
| 471 | int rid = sqlite3_value_int(argv[1]); |
| 472 | const char *zName = (const char*)sqlite3_value_text(argv[2]); |
| 473 | sqlite3_result_text(context, search_stext_cached(zType[0],rid,zName,0), -1, |
| 474 | SQLITE_TRANSIENT); |
| 475 | } |
| 476 | |
| 477 | /* title(TYPE, RID, ARG) |
| 478 | ** |
| 479 | ** Return the title of the document to be search. |
| 480 | */ |
| 481 | static void search_title_sqlfunc( |
| 482 | sqlite3_context *context, |
| 483 | int argc, |
| 484 | sqlite3_value **argv |
| 485 | ){ |
| @@ -461,10 +492,15 @@ | |
| 492 | sqlite3_result_text(context, z, nHdr, SQLITE_TRANSIENT); |
| 493 | }else{ |
| 494 | sqlite3_result_value(context, argv[2]); |
| 495 | } |
| 496 | } |
| 497 | |
| 498 | /* body(TYPE, RID, ARG) |
| 499 | ** |
| 500 | ** Return the body of the document to be search. |
| 501 | */ |
| 502 | static void search_body_sqlfunc( |
| 503 | sqlite3_context *context, |
| 504 | int argc, |
| 505 | sqlite3_value **argv |
| 506 | ){ |
| @@ -474,12 +510,14 @@ | |
| 510 | int nHdr = 0; |
| 511 | char *z = search_stext_cached(zType[0], rid, zName, &nHdr); |
| 512 | sqlite3_result_text(context, z+nHdr+1, -1, SQLITE_TRANSIENT); |
| 513 | } |
| 514 | |
| 515 | /* urlencode(X) |
| 516 | ** |
| 517 | ** Encode a string for use as a query parameter in a URL. This is |
| 518 | ** the equivalent of printf("%T",X). |
| 519 | */ |
| 520 | static void search_urlencode_sqlfunc( |
| 521 | sqlite3_context *context, |
| 522 | int argc, |
| 523 | sqlite3_value **argv |
| @@ -487,13 +525,12 @@ | |
| 525 | char *z = mprintf("%T",sqlite3_value_text(argv[0])); |
| 526 | sqlite3_result_text(context, z, -1, fossil_free); |
| 527 | } |
| 528 | |
| 529 | /* |
| 530 | ** Register the various SQL functions (defined above) needed to implement |
| 531 | ** full-scan search. |
| 532 | */ |
| 533 | void search_sql_setup(sqlite3 *db){ |
| 534 | static int once = 0; |
| 535 | if( once++ ) return; |
| 536 | sqlite3_create_function(db, "search_match", -1, SQLITE_UTF8, 0, |
| @@ -636,16 +673,22 @@ | |
| 673 | } |
| 674 | |
| 675 | /* |
| 676 | ** When this routine is called, there already exists a table |
| 677 | ** |
| 678 | ** x(label,url,score,id,snip). |
| 679 | ** |
| 680 | ** label: The "name" of the document containing the match |
| 681 | ** url: A URL for the document |
| 682 | ** score: How well the document matched |
| 683 | ** id: The document id. Format: xNNNNN, x: type, N: number |
| 684 | ** snip: A snippet for the match |
| 685 | ** |
| 686 | ** And the srchFlags parameter has been validated. This routine |
| 687 | ** fills the X table with search results using a full-scan search. |
| 688 | ** |
| 689 | ** The companion indexed search routine is search_indexed(). |
| 690 | */ |
| 691 | static void search_fullscan( |
| 692 | const char *zPattern, /* The query pattern */ |
| 693 | unsigned int srchFlags /* What to search over */ |
| 694 | ){ |
| @@ -805,16 +848,22 @@ | |
| 848 | } |
| 849 | |
| 850 | /* |
| 851 | ** When this routine is called, there already exists a table |
| 852 | ** |
| 853 | ** x(label,url,score,id,snip). |
| 854 | ** |
| 855 | ** label: The "name" of the document containing the match |
| 856 | ** url: A URL for the document |
| 857 | ** score: How well the document matched |
| 858 | ** id: The document id. Format: xNNNNN, x: type, N: number |
| 859 | ** snip: A snippet for the match |
| 860 | ** |
| 861 | ** And the srchFlags parameter has been validated. This routine |
| 862 | ** fills the X table with search results using FTS indexed search. |
| 863 | ** |
| 864 | ** The companion full-scan search routine is search_fullscan(). |
| 865 | */ |
| 866 | static void search_indexed( |
| 867 | const char *zPattern, /* The query pattern */ |
| 868 | unsigned int srchFlags /* What to search over */ |
| 869 | ){ |
| @@ -910,10 +959,14 @@ | |
| 959 | |
| 960 | /* |
| 961 | ** This routine generates web-page output for a search operation. |
| 962 | ** Other web-pages can invoke this routine to add search results |
| 963 | ** in the middle of the page. |
| 964 | ** |
| 965 | ** This routine works for both full-scan and indexed search. The |
| 966 | ** appropriate low-level search routine is called according to the |
| 967 | ** current configuration. |
| 968 | ** |
| 969 | ** Return the number of rows. |
| 970 | */ |
| 971 | int search_run_and_output( |
| 972 | const char *zPattern, /* The query pattern */ |
| @@ -930,14 +983,14 @@ | |
| 983 | add_content_sql_commands(g.db); |
| 984 | db_multi_exec( |
| 985 | "CREATE TEMP TABLE x(label,url,score,id,date,snip);" |
| 986 | ); |
| 987 | if( !search_index_exists() ){ |
| 988 | search_fullscan(zPattern, srchFlags); /* Full-scan search */ |
| 989 | }else{ |
| 990 | search_update_index(srchFlags); /* Update the index, if necessary */ |
| 991 | search_indexed(zPattern, srchFlags); /* Indexed search */ |
| 992 | } |
| 993 | db_prepare(&q, "SELECT url, snip, label, score, id" |
| 994 | " FROM x" |
| 995 | " ORDER BY score DESC, date DESC;"); |
| 996 | while( db_step(&q)==SQLITE_ROW ){ |
| @@ -1185,11 +1238,14 @@ | |
| 1238 | ** t Ticket text |
| 1239 | ** |
| 1240 | ** rid The RID of an artifact that defines the object |
| 1241 | ** being searched. |
| 1242 | ** |
| 1243 | ** zName Name of the object being searched. This is used |
| 1244 | ** only to help figure out the mimetype (text/plain, |
| 1245 | ** test/html, test/x-fossil-wiki, or text/x-markdown) |
| 1246 | ** so that the code can know how to simplify the text. |
| 1247 | */ |
| 1248 | void search_stext( |
| 1249 | char cType, /* Type of document */ |
| 1250 | int rid, /* BLOB.RID or TAG.TAGID value for document */ |
| 1251 | const char *zName, /* Auxiliary information */ |
| @@ -1288,11 +1344,11 @@ | |
| 1344 | ** with an eType of 0 to clear the cache. |
| 1345 | */ |
| 1346 | char *search_stext_cached( |
| 1347 | char cType, /* Type of document */ |
| 1348 | int rid, /* BLOB.RID or TAG.TAGID value for document */ |
| 1349 | const char *zName, /* Auxiliary information, for mimetype */ |
| 1350 | int *pnTitle /* OUT: length of title in bytes excluding \n */ |
| 1351 | ){ |
| 1352 | static struct { |
| 1353 | Blob stext; /* Cached search text */ |
| 1354 | char cType; /* The type */ |
| @@ -1320,11 +1376,16 @@ | |
| 1376 | } |
| 1377 | |
| 1378 | /* |
| 1379 | ** COMMAND: test-search-stext |
| 1380 | ** |
| 1381 | ** Usage: fossil test-search-stext TYPE RID NAME |
| 1382 | ** |
| 1383 | ** Compute the search text for document TYPE-RID whose name is NAME. |
| 1384 | ** The TYPE is one of "c", "d", "t", or "w". The RID is the document |
| 1385 | ** ID. The NAME is used to figure out a mimetype to use for formatting |
| 1386 | ** the raw document text. |
| 1387 | */ |
| 1388 | void test_search_stext(void){ |
| 1389 | Blob out; |
| 1390 | db_find_and_open_repository(0,0); |
| 1391 | if( g.argc!=5 ) usage("TYPE RID NAME"); |
| @@ -1742,5 +1803,96 @@ | |
| 1803 | }else{ |
| 1804 | fossil_print("%-16s disabled\n", "full-text index:"); |
| 1805 | } |
| 1806 | db_end_transaction(0); |
| 1807 | } |
| 1808 | |
| 1809 | /* |
| 1810 | ** WEBPAGE: test-ftsdocs |
| 1811 | ** |
| 1812 | ** Show a table of all documents currently in the search index. |
| 1813 | */ |
| 1814 | void search_data_page(void){ |
| 1815 | Stmt q; |
| 1816 | const char *zId = P("id"); |
| 1817 | const char *zType = P("y"); |
| 1818 | const char *zIdxed = P("ixed"); |
| 1819 | int id; |
| 1820 | int cnt = 0; |
| 1821 | login_check_credentials(); |
| 1822 | if( !g.perm.Admin ){ login_needed(0); return; } |
| 1823 | if( !search_index_exists() ){ |
| 1824 | @ <p>Indexed search is disabled |
| 1825 | style_footer(); |
| 1826 | return; |
| 1827 | } |
| 1828 | if( zId!=0 && (id = atoi(zId))>0 ){ |
| 1829 | /* Show information about a single ftsdocs entry */ |
| 1830 | style_header("Information about ftsdoc entry %d", id); |
| 1831 | db_prepare(&q, |
| 1832 | "SELECT type||rid, name, idxed, label, url, datetime(mtime)" |
| 1833 | " FROM ftsdocs WHERE rowid=%d", id |
| 1834 | ); |
| 1835 | if( db_step(&q)==SQLITE_ROW ){ |
| 1836 | const char *zUrl = db_column_text(&q,4); |
| 1837 | @ <table border=0> |
| 1838 | @ <tr><td align='right'>rowid:<td> <td>%d(id) |
| 1839 | @ <tr><td align='right'>id:<td><td>%s(db_column_text(&q,0)) |
| 1840 | @ <tr><td align='right'>name:<td><td>%h(db_column_text(&q,1)) |
| 1841 | @ <tr><td align='right'>idxed:<td><td>%d(db_column_int(&q,2)) |
| 1842 | @ <tr><td align='right'>label:<td><td>%h(db_column_text(&q,3)) |
| 1843 | @ <tr><td align='right'>url:<td><td> |
| 1844 | @ <a href='%R%s(zUrl)'>%h(zUrl)</a> |
| 1845 | @ <tr><td align='right'>mtime:<td><td>%s(db_column_text(&q,5)) |
| 1846 | @ </table> |
| 1847 | } |
| 1848 | db_finalize(&q); |
| 1849 | style_footer(); |
| 1850 | return; |
| 1851 | } |
| 1852 | if( zType!=0 && zType[0]!=0 && zType[1]==0 && |
| 1853 | zIdxed!=0 && (zIdxed[0]=='1' || zIdxed[0]=='0') && zIdxed[1]==0 |
| 1854 | ){ |
| 1855 | int ixed = zIdxed[0]=='1'; |
| 1856 | style_header("List of '%c' documents that are%s indexed", |
| 1857 | zType[0], ixed ? "" : " not"); |
| 1858 | db_prepare(&q, |
| 1859 | "SELECT rowid, type||rid ||' '|| coalesce(label,'')" |
| 1860 | " FROM ftsdocs WHERE type='%c' AND %s idxed", |
| 1861 | zType[0], ixed ? "" : "NOT" |
| 1862 | ); |
| 1863 | @ <ul> |
| 1864 | while( db_step(&q)==SQLITE_ROW ){ |
| 1865 | @ <li> <a href='test-ftsdocs?id=%d(db_column_int(&q,0))'> |
| 1866 | @ %h(db_column_text(&q,1))</a> |
| 1867 | } |
| 1868 | @ </ul> |
| 1869 | db_finalize(&q); |
| 1870 | style_footer(); |
| 1871 | return; |
| 1872 | } |
| 1873 | style_header("Summary of ftsdocs"); |
| 1874 | db_prepare(&q, |
| 1875 | "SELECT type, idxed, count(*) FROM ftsdocs" |
| 1876 | " GROUP BY 1, 2 ORDER BY 3 DESC" |
| 1877 | ); |
| 1878 | @ <table border=1 cellpadding=3 cellspacing=0> |
| 1879 | @ <thead> |
| 1880 | @ <tr><th>Type<th>Indexed?<th>Count<th>Link |
| 1881 | @ </thead> |
| 1882 | @ <tbody> |
| 1883 | while( db_step(&q)==SQLITE_ROW ){ |
| 1884 | const char *zType = db_column_text(&q,0); |
| 1885 | int idxed = db_column_int(&q,1); |
| 1886 | int n = db_column_int(&q,2); |
| 1887 | @ <tr><td>%h(zType)<td>%d(idxed) |
| 1888 | @ <td>%d(n) |
| 1889 | @ <td><a href='test-ftsdocs?y=%s(zType)&ixed=%d(idxed)'>listing</a> |
| 1890 | @ </tr> |
| 1891 | cnt += n; |
| 1892 | } |
| 1893 | @ </tbody><tfooter> |
| 1894 | @ <tr><th>Total<th><th>%d(cnt)<th> |
| 1895 | @ </tfooter> |
| 1896 | @ </table> |
| 1897 | style_footer(); |
| 1898 | } |
| 1899 |