Fossil SCM

fossil-scm / src / ftsearch.c
Blame History Raw 496 lines
1
/*
2
** Copyright (c) 2014 D. Richard Hipp
3
**
4
** This program is free software; you can redistribute it and/or
5
** modify it under the terms of the Simplified BSD License (also
6
** known as the "2-Clause License" or "FreeBSD License".)
7
8
** This program is distributed in the hope that it will be useful,
9
** but without any warranty; without even the implied warranty of
10
** merchantability or fitness for a particular purpose.
11
**
12
** Author contact information:
13
** [email protected]
14
** http://www.hwaci.com/drh/
15
**
16
*******************************************************************************
17
**
18
** This file contains code to implement a full-text search function in
19
** Fossil using the FTS4 feature of SQLite.
20
*/
21
#include "config.h"
22
#include "ftsearch.h"
23
#include <assert.h>
24
25
/*
26
** Document Codes:
27
**
28
** A "document code" is a string that describes a particular document.
29
** The first letter is the document type. Second letter is '-' (for
30
** human readability. Subsequent letters are a unique identifier for
31
** the document.
32
**
33
** c-RID - Check-in comment
34
** d-MID-FID - Diff on file FID from checkin MID
35
** e-TAGID - Event text
36
** f-FNID - File content (most recent version)
37
** t-TKTID - Ticket text
38
** w-TAGID - Wiki page (most recent version)
39
**
40
** The FTSEARCHXREF table provides a mapping between document codes
41
** (in the FTSID column) to the DOCID of the FTS4 table.
42
*/
43
44
/*
45
** Return a pointer to string that is the searchable content for a document.
46
** Return NULL if the document does not exist or if there is an error.
47
**
48
** Memory to hold the string is obtained from fossil_malloc() and must be
49
** released by the caller.
50
**
51
** If the second argument is not NULL, then use the second argument to get
52
** the document identifier. If the second argument is NULL, then take the
53
** document identifier from the 3rd and subsequent characters of the
54
** document type.
55
*/
56
char *ftsearch_content(const char *zDocType, const char *zDocId){
57
char *zRes = 0; /* The result to be returned */
58
int id;
59
if( zDocId==0 ){
60
if( zDocType[0]==0 || zDocType[1]==0 ) return 0;
61
zDocId = zDocType + 2;
62
}
63
id = atoi(zDocId);
64
switch( zDocType[0] ){
65
case 'c': { /* A check-in comment. zDocId is the RID */
66
zRes = db_text(0,
67
"SELECT coalesce(ecomment,comment) || char(10) ||"
68
" 'user: ' || coalesce(euser,user) || char(10) ||"
69
" 'branch: ' || coalesce((SELECT value FROM tagxref"
70
" WHERE tagid=%d AND tagtype>0"
71
" AND rid=%d),'trunk')"
72
" FROM event"
73
" WHERE event.objid=%d"
74
" AND event.type GLOB 'c*'",
75
TAG_BRANCH, id, id);
76
break;
77
}
78
case 'f': { /* A file with zDocId as the filename.fnid */
79
Blob x;
80
int rid;
81
rid = db_int(0,
82
"SELECT mlink.fid"
83
" FROM filename, mlink, event"
84
" WHERE filename.fnid=%d"
85
" AND mlink.fnid=filename.fnid"
86
" AND event.objid=mlink.mid"
87
" ORDER BY event.mtime DESC LIMIT 1", id);
88
blob_init(&x,0,0);
89
if( rid>0 ){
90
content_get(rid, &x);
91
}
92
zRes = blob_str(&x);
93
if( !blob_is_malloced(&x) ){
94
zRes = mprintf("%s", zRes);
95
}
96
break;
97
}
98
default: {
99
/* No-op */
100
}
101
}
102
return zRes;
103
}
104
105
/* Return a human-readable description for the document described by
106
** the arguments.
107
**
108
** See ftsearch_content() for further information
109
*/
110
char *ftsearch_description(
111
const char *zDocType,
112
const char *zDocId,
113
int bLink /* Provide hyperlink in text if true */
114
){
115
char *zRes = 0; /* The result to be returned */
116
int id;
117
if( zDocId==0 ){
118
if( zDocType[0]==0 || zDocType[1]==0 ) return 0;
119
zDocId = zDocType + 2;
120
}
121
id = atoi(zDocId);
122
switch( zDocType[0] ){
123
case 'c': { /* A check-in comment. zDocId is the RID */
124
char *zUuid = db_text("","SELECT uuid FROM blob WHERE rid=%d", id);
125
zRes = mprintf("Check-in [%S]", zUuid);
126
fossil_free(zUuid);
127
break;
128
}
129
case 'f': { /* A file. zDocId is the FNID */
130
char *zName = db_text("","SELECT name FROM filename WHERE fnid=%d",id);
131
zRes = mprintf("File %s", zName);
132
fossil_free(zName);
133
break;
134
}
135
default: {
136
/* No-op */
137
}
138
}
139
return zRes;
140
}
141
142
/*
143
** COMMAND: test-ftsearch-content
144
**
145
** Usage: %fossil test-ftsearch-content DOCUMENTCODE
146
**
147
** Return the content for the given DOCUMENTCODE. This command is used
148
** for testing and debugging the ftsearch_content() method in the
149
** full-text search module.
150
*/
151
void test_doc_content_cmd(void){
152
char *zContent = 0;
153
char *zDesc = 0;
154
db_find_and_open_repository(0, 0);
155
verify_all_options();
156
if( g.argc!=3 ) usage("DOCUMENTCODE");
157
if( strlen(g.argv[2])>3 ){
158
zContent = ftsearch_content(g.argv[2],0);
159
zDesc = ftsearch_description(g.argv[2],0,0);
160
}
161
if( zDesc ){
162
fossil_print("Description: %s\n", zDesc);
163
fossil_free(zDesc);
164
}
165
if( zContent ){
166
fossil_print(
167
"Content -------------------------------------------------------------\n"
168
"%s\n"
169
"---------------------------------------------------------------------\n",
170
zContent);
171
fossil_free(zContent);
172
}
173
}
174
175
/*
176
** Implementation of the ftsearch_content() SQL function.
177
*/
178
static void ftsearch_content_sql_func(
179
sqlite3_context *context,
180
int argc,
181
sqlite3_value **argv
182
){
183
const char *zDocType; /* [cdeftw] */
184
const char *zDocId; /* Identifier based on zDocType */
185
char *zRes; /* Result */
186
187
zDocType = (const char*)sqlite3_value_text(argv[0]);
188
zDocId = argc>=2 ? (const char*)sqlite3_value_text(argv[1]) : 0;
189
zRes = ftsearch_content(zDocType, zDocId);
190
if( zRes ){
191
sqlite3_result_text(context, zRes, -1, (void(*)(void*))fossil_free);
192
}
193
}
194
195
/*
196
** Invoke this routine in order to install the ftsearch_content() SQL
197
** function on an SQLite database connection.
198
**
199
** sqlite3_auto_extension(ftsearch_add_sql_func);
200
**
201
** to cause this extension to be automatically loaded into each new
202
** database connection.
203
*/
204
int ftsearch_add_sql_func(sqlite3 *db){
205
int rc;
206
rc = sqlite3_create_function(db, "ftsearch_content", 1, SQLITE_UTF8, 0,
207
ftsearch_content_sql_func, 0, 0);
208
if( rc==SQLITE_OK ){
209
rc = sqlite3_create_function(db, "ftsearch_content", 2, SQLITE_UTF8, 0,
210
ftsearch_content_sql_func, 0, 0);
211
}
212
return rc;
213
}
214
215
/*
216
** Delete the ftsearch tables, views, and indexes
217
*/
218
void ftsearch_disable_all(void){
219
Stmt q;
220
Blob sql;
221
db_begin_transaction();
222
db_prepare(&q,
223
"SELECT type, name FROM %s.sqlite_master"
224
" WHERE type IN ('table','view')"
225
" AND name GLOB 'ftsearch*'"
226
" AND name NOT GLOB 'ftsearch_*'",
227
db_name("repository")
228
);
229
blob_init(&sql, 0, 0);
230
while( db_step(&q)==SQLITE_ROW ){
231
blob_appendf(&sql, "DROP %s IF EXISTS \"%w\";\n",
232
db_column_text(&q,0), db_column_text(&q,1));
233
}
234
db_finalize(&q);
235
db_multi_exec("%s", blob_str(&sql)/*safe-for-%s*/);
236
blob_reset(&sql);
237
db_end_transaction(0);
238
}
239
240
/*
241
** Completely rebuild the ftsearch indexes from scratch
242
*/
243
void ftsearch_rebuild_all(void){
244
const char *zEnables;
245
db_begin_transaction();
246
ftsearch_disable_all();
247
zEnables = db_get("ftsearch-index-type", "cdeftw");
248
249
/* If none of the search categories are enabled, then do not
250
** bother constructing the search tables
251
*/
252
if( sqlite3_strglob("*[cdeftw]*", zEnables) ) return;
253
254
/* The FTSSEARCHXREF table provides a mapping between the integer
255
** document-ids in FTS4 to the "document codes" that describe a
256
** referenced object
257
*/
258
db_multi_exec(
259
"CREATE TABLE %s.ftsearchxref(\n"
260
" docid INTEGER PRIMARY KEY,\n" /* Link to ftsearch.docid */
261
" ftsid TEXT UNIQUE,\n" /* The document code */
262
" mtime DATE\n" /* Timestamp on this object */
263
");\n",
264
db_name("repository")
265
);
266
267
/* The FTSEARCHBODY view provides the content for the FTS4 table
268
*/
269
db_multi_exec(
270
"CREATE VIEW %s.ftsearchbody AS"
271
" SELECT docid AS rowid, ftsearch_content(ftsid) AS body"
272
" FROM ftsearchxref;\n",
273
db_name("repository")
274
);
275
276
/* This is the FTS4 table used for searching.
277
** Make use of an undocumented feature of the FTS4.simple tokenizer
278
** that the second argument is a list of separator characters. Use
279
** this to make "_" not be a separator so that identifiers that contain
280
** "_" are not split apart.
281
*/
282
{
283
char zSep[129];
284
int i, j;
285
for(i=0, j=1; j<0x80; j++){
286
if( j=='_' || fossil_isalnum(j) ) continue;
287
zSep[i++] = j;
288
}
289
zSep[i] = 0;
290
db_multi_exec(
291
"CREATE VIRTUAL TABLE %s.ftsearch USING fts4("
292
"body,"
293
"tokenize=simple \"\" \"%w\","
294
"content='ftsearchbody');",
295
db_name("repository"), zSep
296
);
297
}
298
if( strchr(zEnables, 'c')!=0 ){
299
/* Populate the FTSEARCHXREF table with references to all check-in
300
** comments currently in the event table
301
*/
302
db_multi_exec(
303
"INSERT INTO ftsearchxref(ftsid,mtime)"
304
" SELECT 'c-' || objid, mtime FROM event"
305
" WHERE type='ci';"
306
);
307
}
308
309
if( strchr(zEnables, 'f')!=0 ){
310
/* Populate the FTSEARCHXREF table with references to all files
311
*/
312
db_multi_exec(
313
"INSERT INTO ftsearchxref(ftsid,mtime)"
314
" SELECT 'f-' || filename.fnid, max(event.mtime)"
315
" FROM filename, mlink, event"
316
" WHERE mlink.fnid=filename.fnid"
317
" AND event.objid=mlink.mid"
318
" AND %s"
319
" GROUP BY 1",
320
glob_expr("filename.name", db_get("search-file-glob","*"))
321
);
322
}
323
324
/* Index every document mentioned in the FTSEARCHXREF table */
325
db_multi_exec(
326
"INSERT INTO ftsearch(docid,body)"
327
" SELECT docid, ftsearch_content(ftsid) FROM ftsearchxref;"
328
);
329
db_end_transaction(0);
330
}
331
332
/*
333
** COMMAND: search-config
334
**
335
** Usage: %fossil search PATTERN
336
** %fossil search-config SUBCOMMAND ....
337
**
338
** The "search" command locates resources that contain the given web-search
339
** style PATTERN. This only works if the repository has be configured to
340
** enable searching.
341
**
342
** The "search-config" is used to setup the search feature of the repository.
343
** Subcommands are:
344
**
345
** fossil search-config doclist
346
**
347
** List all the documents currently indexed
348
**
349
** fossil search-config rebuild
350
**
351
** Completely rebuild the search index.
352
**
353
** fossil search-config reset
354
**
355
** Disable search and remove the search indexes from the repository.
356
**
357
** fossil search-config setting NAME ?VALUE?
358
**
359
** Set or query a search setting. NAMES are:
360
** file-glob Comma-separated list of GLOBs for file search
361
** ticket-expr SQL expression to render TICKET content
362
** ticketchng-expr SQL expression to render TICKETCHNG content
363
** index-type Zero or more characters from [cdeftw]
364
**
365
** The index-type determines what resources are indexed and available for
366
** searching. If the index-type is an empty string, the search is
367
** complete disabled. These are the valid index-types:
368
** c: check-in comments
369
** d: check-in difference marks
370
** e: event text
371
** f: file text (subject to the file-glob)
372
** t: ticket text (requires ticket-expr and ticketchng-expr)
373
** w: wiki pages
374
**
375
** It is necessary to run "fossil search-config rebuild" after making
376
** setting changes in order to reconstruct the search index
377
**
378
** fossil search-config status
379
**
380
** Report on the status of the search configuration.
381
*/
382
void ftsearch_cmd(void){
383
static const char *azSettings[] = {
384
"file-glob", "index-type", "ticket-expr", "ticketchng-expr"
385
};
386
const char *zSubCmd;
387
int nSubCmd;
388
db_find_and_open_repository(0, 0);
389
verify_all_options();
390
if( g.argc<3 ) usage("search PATTERN");
391
zSubCmd = g.argv[2];
392
nSubCmd = (int)strlen(zSubCmd);
393
db_begin_transaction();
394
if( strlen(g.argv[1])<=6 && g.argc==3 ){
395
/* This must be the "fossil search PATTERN" command */
396
Stmt q;
397
int i = 0;
398
#ifdef _WIN32
399
const char *zMark1 = "*";
400
const char *zMark2 = "*";
401
#else
402
const char *zMark1 = "\033[1m";
403
const char *zMark2 = "\033[0m";
404
#endif
405
if( !db_table_exists("repository","ftsearch") ){
406
fossil_fatal("search is disabled - see \"fossil help search\""
407
" for more information");
408
}
409
db_prepare(&q, "SELECT "
410
" snippet(ftsearch,%Q,%Q,'...'),"
411
" ftsearchxref.ftsid,"
412
" date(ftsearchxref.mtime)"
413
" FROM ftsearch, ftsearchxref"
414
" WHERE ftsearch.body MATCH %Q"
415
" AND ftsearchxref.docid=ftsearch.docid"
416
" ORDER BY ftsearchxref.mtime DESC LIMIT 50;",
417
zMark1, zMark2, zSubCmd);
418
while( db_step(&q)==SQLITE_ROW ){
419
const char *zSnippet = db_column_text(&q,0);
420
char *zDesc = ftsearch_description(db_column_text(&q,1),0,0);
421
const char *zDate = db_column_text(&q,2);
422
if( i++ > 0 ){
423
fossil_print("----------------------------------------------------\n");
424
}
425
fossil_print("%s (%s)\n%s\n", zDesc, zDate, zSnippet);
426
fossil_free(zDesc);
427
}
428
db_finalize(&q);
429
}else if( strncmp(zSubCmd, "doclist", nSubCmd)==0 ){
430
if( db_table_exists("repository","ftsearch") ){
431
Stmt q;
432
db_prepare(&q, "SELECT ftsid, date(mtime) FROM ftsearchxref"
433
" ORDER BY mtime DESC");
434
while( db_step(&q)==SQLITE_ROW ){
435
const char *zDate = db_column_text(&q,1);
436
const char *zFtsid = db_column_text(&q,0);
437
char *zDesc = ftsearch_description(zFtsid,0,0);
438
fossil_print("%s (%s)\n", zDesc, zDate);
439
fossil_free(zDesc);
440
}
441
db_finalize(&q);
442
}
443
}else if( strncmp(zSubCmd, "rebuild", nSubCmd)==0 ){
444
ftsearch_rebuild_all();
445
}else if( strncmp(zSubCmd, "reset", nSubCmd)==0 ){
446
ftsearch_disable_all();
447
}else if( strncmp(zSubCmd, "settings", nSubCmd)==0 ){
448
const char *zName = g.argv[3];
449
const char *zValue = g.argc>=5 ? g.argv[4] : 0;
450
char *zFullname;
451
int i;
452
if( g.argc<4 ) usage("setting NAME ?VALUE?");
453
for(i=0; i<count(azSettings); i++){
454
if( strcmp(zName, azSettings[i])==0 ) break;
455
}
456
if( i>=count(azSettings) ){
457
Blob x;
458
blob_init(&x,0,0);
459
for(i=0; i<count(azSettings); i++) blob_appendf(&x," %s", azSettings[i]);
460
fossil_fatal("unknown setting \"%s\" - should be one of:%s",
461
zName, blob_str(&x));
462
}
463
zFullname = mprintf("search-%s", zName);
464
if( zValue==0 ){
465
zValue = db_get(zFullname, 0);
466
}else{
467
db_set(zFullname, zValue, 0);
468
}
469
if( zValue==0 ){
470
fossil_print("%s is not defined\n", zName);
471
}else{
472
fossil_print("%s: %s\n", zName, zValue);
473
}
474
}else if( strncmp(zSubCmd, "status", nSubCmd)==0 ){
475
int i;
476
fossil_print("search settings:\n");
477
for(i=0; i<count(azSettings); i++){
478
char *zFullname = mprintf("search-%s", azSettings[i]);
479
char *zValue = db_get(zFullname, 0);
480
if( zValue==0 ){
481
fossil_print(" %s is undefined\n", azSettings[i]);
482
}else{
483
fossil_print(" %s: %s\n", azSettings[i], zValue);
484
}
485
fossil_free(zFullname);
486
}
487
if( db_table_exists("repository","ftsearchxref") ){
488
int n = db_int(0, "SELECT count(*) FROM ftsearchxref");
489
fossil_print("search is enabled with %d documents indexed\n", n);
490
}else{
491
fossil_print("search is disabled\n");
492
}
493
}
494
db_end_transaction(0);
495
}
496

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button