|
1
|
/* |
|
2
|
** Copyright (c) 2014 D. Richard Hipp |
|
3
|
** |
|
4
|
** This program is free software; you can redistribute it and/or |
|
5
|
** modify it under the terms of the Simplified BSD License (also |
|
6
|
** known as the "2-Clause License" or "FreeBSD License".) |
|
7
|
|
|
8
|
** This program is distributed in the hope that it will be useful, |
|
9
|
** but without any warranty; without even the implied warranty of |
|
10
|
** merchantability or fitness for a particular purpose. |
|
11
|
** |
|
12
|
** Author contact information: |
|
13
|
** [email protected] |
|
14
|
** http://www.hwaci.com/drh/ |
|
15
|
** |
|
16
|
******************************************************************************* |
|
17
|
** |
|
18
|
** This file contains code to implement a full-text search function in |
|
19
|
** Fossil using the FTS4 feature of SQLite. |
|
20
|
*/ |
|
21
|
#include "config.h" |
|
22
|
#include "ftsearch.h" |
|
23
|
#include <assert.h> |
|
24
|
|
|
25
|
/* |
|
26
|
** Document Codes: |
|
27
|
** |
|
28
|
** A "document code" is a string that describes a particular document. |
|
29
|
** The first letter is the document type. Second letter is '-' (for |
|
30
|
** human readability. Subsequent letters are a unique identifier for |
|
31
|
** the document. |
|
32
|
** |
|
33
|
** c-RID - Check-in comment |
|
34
|
** d-MID-FID - Diff on file FID from checkin MID |
|
35
|
** e-TAGID - Event text |
|
36
|
** f-FNID - File content (most recent version) |
|
37
|
** t-TKTID - Ticket text |
|
38
|
** w-TAGID - Wiki page (most recent version) |
|
39
|
** |
|
40
|
** The FTSEARCHXREF table provides a mapping between document codes |
|
41
|
** (in the FTSID column) to the DOCID of the FTS4 table. |
|
42
|
*/ |
|
43
|
|
|
44
|
/* |
|
45
|
** Return a pointer to string that is the searchable content for a document. |
|
46
|
** Return NULL if the document does not exist or if there is an error. |
|
47
|
** |
|
48
|
** Memory to hold the string is obtained from fossil_malloc() and must be |
|
49
|
** released by the caller. |
|
50
|
** |
|
51
|
** If the second argument is not NULL, then use the second argument to get |
|
52
|
** the document identifier. If the second argument is NULL, then take the |
|
53
|
** document identifier from the 3rd and subsequent characters of the |
|
54
|
** document type. |
|
55
|
*/ |
|
56
|
char *ftsearch_content(const char *zDocType, const char *zDocId){ |
|
57
|
char *zRes = 0; /* The result to be returned */ |
|
58
|
int id; |
|
59
|
if( zDocId==0 ){ |
|
60
|
if( zDocType[0]==0 || zDocType[1]==0 ) return 0; |
|
61
|
zDocId = zDocType + 2; |
|
62
|
} |
|
63
|
id = atoi(zDocId); |
|
64
|
switch( zDocType[0] ){ |
|
65
|
case 'c': { /* A check-in comment. zDocId is the RID */ |
|
66
|
zRes = db_text(0, |
|
67
|
"SELECT coalesce(ecomment,comment) || char(10) ||" |
|
68
|
" 'user: ' || coalesce(euser,user) || char(10) ||" |
|
69
|
" 'branch: ' || coalesce((SELECT value FROM tagxref" |
|
70
|
" WHERE tagid=%d AND tagtype>0" |
|
71
|
" AND rid=%d),'trunk')" |
|
72
|
" FROM event" |
|
73
|
" WHERE event.objid=%d" |
|
74
|
" AND event.type GLOB 'c*'", |
|
75
|
TAG_BRANCH, id, id); |
|
76
|
break; |
|
77
|
} |
|
78
|
case 'f': { /* A file with zDocId as the filename.fnid */ |
|
79
|
Blob x; |
|
80
|
int rid; |
|
81
|
rid = db_int(0, |
|
82
|
"SELECT mlink.fid" |
|
83
|
" FROM filename, mlink, event" |
|
84
|
" WHERE filename.fnid=%d" |
|
85
|
" AND mlink.fnid=filename.fnid" |
|
86
|
" AND event.objid=mlink.mid" |
|
87
|
" ORDER BY event.mtime DESC LIMIT 1", id); |
|
88
|
blob_init(&x,0,0); |
|
89
|
if( rid>0 ){ |
|
90
|
content_get(rid, &x); |
|
91
|
} |
|
92
|
zRes = blob_str(&x); |
|
93
|
if( !blob_is_malloced(&x) ){ |
|
94
|
zRes = mprintf("%s", zRes); |
|
95
|
} |
|
96
|
break; |
|
97
|
} |
|
98
|
default: { |
|
99
|
/* No-op */ |
|
100
|
} |
|
101
|
} |
|
102
|
return zRes; |
|
103
|
} |
|
104
|
|
|
105
|
/* Return a human-readable description for the document described by |
|
106
|
** the arguments. |
|
107
|
** |
|
108
|
** See ftsearch_content() for further information |
|
109
|
*/ |
|
110
|
char *ftsearch_description( |
|
111
|
const char *zDocType, |
|
112
|
const char *zDocId, |
|
113
|
int bLink /* Provide hyperlink in text if true */ |
|
114
|
){ |
|
115
|
char *zRes = 0; /* The result to be returned */ |
|
116
|
int id; |
|
117
|
if( zDocId==0 ){ |
|
118
|
if( zDocType[0]==0 || zDocType[1]==0 ) return 0; |
|
119
|
zDocId = zDocType + 2; |
|
120
|
} |
|
121
|
id = atoi(zDocId); |
|
122
|
switch( zDocType[0] ){ |
|
123
|
case 'c': { /* A check-in comment. zDocId is the RID */ |
|
124
|
char *zUuid = db_text("","SELECT uuid FROM blob WHERE rid=%d", id); |
|
125
|
zRes = mprintf("Check-in [%S]", zUuid); |
|
126
|
fossil_free(zUuid); |
|
127
|
break; |
|
128
|
} |
|
129
|
case 'f': { /* A file. zDocId is the FNID */ |
|
130
|
char *zName = db_text("","SELECT name FROM filename WHERE fnid=%d",id); |
|
131
|
zRes = mprintf("File %s", zName); |
|
132
|
fossil_free(zName); |
|
133
|
break; |
|
134
|
} |
|
135
|
default: { |
|
136
|
/* No-op */ |
|
137
|
} |
|
138
|
} |
|
139
|
return zRes; |
|
140
|
} |
|
141
|
|
|
142
|
/* |
|
143
|
** COMMAND: test-ftsearch-content |
|
144
|
** |
|
145
|
** Usage: %fossil test-ftsearch-content DOCUMENTCODE |
|
146
|
** |
|
147
|
** Return the content for the given DOCUMENTCODE. This command is used |
|
148
|
** for testing and debugging the ftsearch_content() method in the |
|
149
|
** full-text search module. |
|
150
|
*/ |
|
151
|
void test_doc_content_cmd(void){ |
|
152
|
char *zContent = 0; |
|
153
|
char *zDesc = 0; |
|
154
|
db_find_and_open_repository(0, 0); |
|
155
|
verify_all_options(); |
|
156
|
if( g.argc!=3 ) usage("DOCUMENTCODE"); |
|
157
|
if( strlen(g.argv[2])>3 ){ |
|
158
|
zContent = ftsearch_content(g.argv[2],0); |
|
159
|
zDesc = ftsearch_description(g.argv[2],0,0); |
|
160
|
} |
|
161
|
if( zDesc ){ |
|
162
|
fossil_print("Description: %s\n", zDesc); |
|
163
|
fossil_free(zDesc); |
|
164
|
} |
|
165
|
if( zContent ){ |
|
166
|
fossil_print( |
|
167
|
"Content -------------------------------------------------------------\n" |
|
168
|
"%s\n" |
|
169
|
"---------------------------------------------------------------------\n", |
|
170
|
zContent); |
|
171
|
fossil_free(zContent); |
|
172
|
} |
|
173
|
} |
|
174
|
|
|
175
|
/* |
|
176
|
** Implementation of the ftsearch_content() SQL function. |
|
177
|
*/ |
|
178
|
static void ftsearch_content_sql_func( |
|
179
|
sqlite3_context *context, |
|
180
|
int argc, |
|
181
|
sqlite3_value **argv |
|
182
|
){ |
|
183
|
const char *zDocType; /* [cdeftw] */ |
|
184
|
const char *zDocId; /* Identifier based on zDocType */ |
|
185
|
char *zRes; /* Result */ |
|
186
|
|
|
187
|
zDocType = (const char*)sqlite3_value_text(argv[0]); |
|
188
|
zDocId = argc>=2 ? (const char*)sqlite3_value_text(argv[1]) : 0; |
|
189
|
zRes = ftsearch_content(zDocType, zDocId); |
|
190
|
if( zRes ){ |
|
191
|
sqlite3_result_text(context, zRes, -1, (void(*)(void*))fossil_free); |
|
192
|
} |
|
193
|
} |
|
194
|
|
|
195
|
/* |
|
196
|
** Invoke this routine in order to install the ftsearch_content() SQL |
|
197
|
** function on an SQLite database connection. |
|
198
|
** |
|
199
|
** sqlite3_auto_extension(ftsearch_add_sql_func); |
|
200
|
** |
|
201
|
** to cause this extension to be automatically loaded into each new |
|
202
|
** database connection. |
|
203
|
*/ |
|
204
|
int ftsearch_add_sql_func(sqlite3 *db){ |
|
205
|
int rc; |
|
206
|
rc = sqlite3_create_function(db, "ftsearch_content", 1, SQLITE_UTF8, 0, |
|
207
|
ftsearch_content_sql_func, 0, 0); |
|
208
|
if( rc==SQLITE_OK ){ |
|
209
|
rc = sqlite3_create_function(db, "ftsearch_content", 2, SQLITE_UTF8, 0, |
|
210
|
ftsearch_content_sql_func, 0, 0); |
|
211
|
} |
|
212
|
return rc; |
|
213
|
} |
|
214
|
|
|
215
|
/* |
|
216
|
** Delete the ftsearch tables, views, and indexes |
|
217
|
*/ |
|
218
|
void ftsearch_disable_all(void){ |
|
219
|
Stmt q; |
|
220
|
Blob sql; |
|
221
|
db_begin_transaction(); |
|
222
|
db_prepare(&q, |
|
223
|
"SELECT type, name FROM %s.sqlite_master" |
|
224
|
" WHERE type IN ('table','view')" |
|
225
|
" AND name GLOB 'ftsearch*'" |
|
226
|
" AND name NOT GLOB 'ftsearch_*'", |
|
227
|
db_name("repository") |
|
228
|
); |
|
229
|
blob_init(&sql, 0, 0); |
|
230
|
while( db_step(&q)==SQLITE_ROW ){ |
|
231
|
blob_appendf(&sql, "DROP %s IF EXISTS \"%w\";\n", |
|
232
|
db_column_text(&q,0), db_column_text(&q,1)); |
|
233
|
} |
|
234
|
db_finalize(&q); |
|
235
|
db_multi_exec("%s", blob_str(&sql)/*safe-for-%s*/); |
|
236
|
blob_reset(&sql); |
|
237
|
db_end_transaction(0); |
|
238
|
} |
|
239
|
|
|
240
|
/* |
|
241
|
** Completely rebuild the ftsearch indexes from scratch |
|
242
|
*/ |
|
243
|
void ftsearch_rebuild_all(void){ |
|
244
|
const char *zEnables; |
|
245
|
db_begin_transaction(); |
|
246
|
ftsearch_disable_all(); |
|
247
|
zEnables = db_get("ftsearch-index-type", "cdeftw"); |
|
248
|
|
|
249
|
/* If none of the search categories are enabled, then do not |
|
250
|
** bother constructing the search tables |
|
251
|
*/ |
|
252
|
if( sqlite3_strglob("*[cdeftw]*", zEnables) ) return; |
|
253
|
|
|
254
|
/* The FTSSEARCHXREF table provides a mapping between the integer |
|
255
|
** document-ids in FTS4 to the "document codes" that describe a |
|
256
|
** referenced object |
|
257
|
*/ |
|
258
|
db_multi_exec( |
|
259
|
"CREATE TABLE %s.ftsearchxref(\n" |
|
260
|
" docid INTEGER PRIMARY KEY,\n" /* Link to ftsearch.docid */ |
|
261
|
" ftsid TEXT UNIQUE,\n" /* The document code */ |
|
262
|
" mtime DATE\n" /* Timestamp on this object */ |
|
263
|
");\n", |
|
264
|
db_name("repository") |
|
265
|
); |
|
266
|
|
|
267
|
/* The FTSEARCHBODY view provides the content for the FTS4 table |
|
268
|
*/ |
|
269
|
db_multi_exec( |
|
270
|
"CREATE VIEW %s.ftsearchbody AS" |
|
271
|
" SELECT docid AS rowid, ftsearch_content(ftsid) AS body" |
|
272
|
" FROM ftsearchxref;\n", |
|
273
|
db_name("repository") |
|
274
|
); |
|
275
|
|
|
276
|
/* This is the FTS4 table used for searching. |
|
277
|
** Make use of an undocumented feature of the FTS4.simple tokenizer |
|
278
|
** that the second argument is a list of separator characters. Use |
|
279
|
** this to make "_" not be a separator so that identifiers that contain |
|
280
|
** "_" are not split apart. |
|
281
|
*/ |
|
282
|
{ |
|
283
|
char zSep[129]; |
|
284
|
int i, j; |
|
285
|
for(i=0, j=1; j<0x80; j++){ |
|
286
|
if( j=='_' || fossil_isalnum(j) ) continue; |
|
287
|
zSep[i++] = j; |
|
288
|
} |
|
289
|
zSep[i] = 0; |
|
290
|
db_multi_exec( |
|
291
|
"CREATE VIRTUAL TABLE %s.ftsearch USING fts4(" |
|
292
|
"body," |
|
293
|
"tokenize=simple \"\" \"%w\"," |
|
294
|
"content='ftsearchbody');", |
|
295
|
db_name("repository"), zSep |
|
296
|
); |
|
297
|
} |
|
298
|
if( strchr(zEnables, 'c')!=0 ){ |
|
299
|
/* Populate the FTSEARCHXREF table with references to all check-in |
|
300
|
** comments currently in the event table |
|
301
|
*/ |
|
302
|
db_multi_exec( |
|
303
|
"INSERT INTO ftsearchxref(ftsid,mtime)" |
|
304
|
" SELECT 'c-' || objid, mtime FROM event" |
|
305
|
" WHERE type='ci';" |
|
306
|
); |
|
307
|
} |
|
308
|
|
|
309
|
if( strchr(zEnables, 'f')!=0 ){ |
|
310
|
/* Populate the FTSEARCHXREF table with references to all files |
|
311
|
*/ |
|
312
|
db_multi_exec( |
|
313
|
"INSERT INTO ftsearchxref(ftsid,mtime)" |
|
314
|
" SELECT 'f-' || filename.fnid, max(event.mtime)" |
|
315
|
" FROM filename, mlink, event" |
|
316
|
" WHERE mlink.fnid=filename.fnid" |
|
317
|
" AND event.objid=mlink.mid" |
|
318
|
" AND %s" |
|
319
|
" GROUP BY 1", |
|
320
|
glob_expr("filename.name", db_get("search-file-glob","*")) |
|
321
|
); |
|
322
|
} |
|
323
|
|
|
324
|
/* Index every document mentioned in the FTSEARCHXREF table */ |
|
325
|
db_multi_exec( |
|
326
|
"INSERT INTO ftsearch(docid,body)" |
|
327
|
" SELECT docid, ftsearch_content(ftsid) FROM ftsearchxref;" |
|
328
|
); |
|
329
|
db_end_transaction(0); |
|
330
|
} |
|
331
|
|
|
332
|
/* |
|
333
|
** COMMAND: search-config |
|
334
|
** |
|
335
|
** Usage: %fossil search PATTERN |
|
336
|
** %fossil search-config SUBCOMMAND .... |
|
337
|
** |
|
338
|
** The "search" command locates resources that contain the given web-search |
|
339
|
** style PATTERN. This only works if the repository has be configured to |
|
340
|
** enable searching. |
|
341
|
** |
|
342
|
** The "search-config" is used to setup the search feature of the repository. |
|
343
|
** Subcommands are: |
|
344
|
** |
|
345
|
** fossil search-config doclist |
|
346
|
** |
|
347
|
** List all the documents currently indexed |
|
348
|
** |
|
349
|
** fossil search-config rebuild |
|
350
|
** |
|
351
|
** Completely rebuild the search index. |
|
352
|
** |
|
353
|
** fossil search-config reset |
|
354
|
** |
|
355
|
** Disable search and remove the search indexes from the repository. |
|
356
|
** |
|
357
|
** fossil search-config setting NAME ?VALUE? |
|
358
|
** |
|
359
|
** Set or query a search setting. NAMES are: |
|
360
|
** file-glob Comma-separated list of GLOBs for file search |
|
361
|
** ticket-expr SQL expression to render TICKET content |
|
362
|
** ticketchng-expr SQL expression to render TICKETCHNG content |
|
363
|
** index-type Zero or more characters from [cdeftw] |
|
364
|
** |
|
365
|
** The index-type determines what resources are indexed and available for |
|
366
|
** searching. If the index-type is an empty string, the search is |
|
367
|
** complete disabled. These are the valid index-types: |
|
368
|
** c: check-in comments |
|
369
|
** d: check-in difference marks |
|
370
|
** e: event text |
|
371
|
** f: file text (subject to the file-glob) |
|
372
|
** t: ticket text (requires ticket-expr and ticketchng-expr) |
|
373
|
** w: wiki pages |
|
374
|
** |
|
375
|
** It is necessary to run "fossil search-config rebuild" after making |
|
376
|
** setting changes in order to reconstruct the search index |
|
377
|
** |
|
378
|
** fossil search-config status |
|
379
|
** |
|
380
|
** Report on the status of the search configuration. |
|
381
|
*/ |
|
382
|
void ftsearch_cmd(void){ |
|
383
|
static const char *azSettings[] = { |
|
384
|
"file-glob", "index-type", "ticket-expr", "ticketchng-expr" |
|
385
|
}; |
|
386
|
const char *zSubCmd; |
|
387
|
int nSubCmd; |
|
388
|
db_find_and_open_repository(0, 0); |
|
389
|
verify_all_options(); |
|
390
|
if( g.argc<3 ) usage("search PATTERN"); |
|
391
|
zSubCmd = g.argv[2]; |
|
392
|
nSubCmd = (int)strlen(zSubCmd); |
|
393
|
db_begin_transaction(); |
|
394
|
if( strlen(g.argv[1])<=6 && g.argc==3 ){ |
|
395
|
/* This must be the "fossil search PATTERN" command */ |
|
396
|
Stmt q; |
|
397
|
int i = 0; |
|
398
|
#ifdef _WIN32 |
|
399
|
const char *zMark1 = "*"; |
|
400
|
const char *zMark2 = "*"; |
|
401
|
#else |
|
402
|
const char *zMark1 = "\033[1m"; |
|
403
|
const char *zMark2 = "\033[0m"; |
|
404
|
#endif |
|
405
|
if( !db_table_exists("repository","ftsearch") ){ |
|
406
|
fossil_fatal("search is disabled - see \"fossil help search\"" |
|
407
|
" for more information"); |
|
408
|
} |
|
409
|
db_prepare(&q, "SELECT " |
|
410
|
" snippet(ftsearch,%Q,%Q,'...')," |
|
411
|
" ftsearchxref.ftsid," |
|
412
|
" date(ftsearchxref.mtime)" |
|
413
|
" FROM ftsearch, ftsearchxref" |
|
414
|
" WHERE ftsearch.body MATCH %Q" |
|
415
|
" AND ftsearchxref.docid=ftsearch.docid" |
|
416
|
" ORDER BY ftsearchxref.mtime DESC LIMIT 50;", |
|
417
|
zMark1, zMark2, zSubCmd); |
|
418
|
while( db_step(&q)==SQLITE_ROW ){ |
|
419
|
const char *zSnippet = db_column_text(&q,0); |
|
420
|
char *zDesc = ftsearch_description(db_column_text(&q,1),0,0); |
|
421
|
const char *zDate = db_column_text(&q,2); |
|
422
|
if( i++ > 0 ){ |
|
423
|
fossil_print("----------------------------------------------------\n"); |
|
424
|
} |
|
425
|
fossil_print("%s (%s)\n%s\n", zDesc, zDate, zSnippet); |
|
426
|
fossil_free(zDesc); |
|
427
|
} |
|
428
|
db_finalize(&q); |
|
429
|
}else if( strncmp(zSubCmd, "doclist", nSubCmd)==0 ){ |
|
430
|
if( db_table_exists("repository","ftsearch") ){ |
|
431
|
Stmt q; |
|
432
|
db_prepare(&q, "SELECT ftsid, date(mtime) FROM ftsearchxref" |
|
433
|
" ORDER BY mtime DESC"); |
|
434
|
while( db_step(&q)==SQLITE_ROW ){ |
|
435
|
const char *zDate = db_column_text(&q,1); |
|
436
|
const char *zFtsid = db_column_text(&q,0); |
|
437
|
char *zDesc = ftsearch_description(zFtsid,0,0); |
|
438
|
fossil_print("%s (%s)\n", zDesc, zDate); |
|
439
|
fossil_free(zDesc); |
|
440
|
} |
|
441
|
db_finalize(&q); |
|
442
|
} |
|
443
|
}else if( strncmp(zSubCmd, "rebuild", nSubCmd)==0 ){ |
|
444
|
ftsearch_rebuild_all(); |
|
445
|
}else if( strncmp(zSubCmd, "reset", nSubCmd)==0 ){ |
|
446
|
ftsearch_disable_all(); |
|
447
|
}else if( strncmp(zSubCmd, "settings", nSubCmd)==0 ){ |
|
448
|
const char *zName = g.argv[3]; |
|
449
|
const char *zValue = g.argc>=5 ? g.argv[4] : 0; |
|
450
|
char *zFullname; |
|
451
|
int i; |
|
452
|
if( g.argc<4 ) usage("setting NAME ?VALUE?"); |
|
453
|
for(i=0; i<count(azSettings); i++){ |
|
454
|
if( strcmp(zName, azSettings[i])==0 ) break; |
|
455
|
} |
|
456
|
if( i>=count(azSettings) ){ |
|
457
|
Blob x; |
|
458
|
blob_init(&x,0,0); |
|
459
|
for(i=0; i<count(azSettings); i++) blob_appendf(&x," %s", azSettings[i]); |
|
460
|
fossil_fatal("unknown setting \"%s\" - should be one of:%s", |
|
461
|
zName, blob_str(&x)); |
|
462
|
} |
|
463
|
zFullname = mprintf("search-%s", zName); |
|
464
|
if( zValue==0 ){ |
|
465
|
zValue = db_get(zFullname, 0); |
|
466
|
}else{ |
|
467
|
db_set(zFullname, zValue, 0); |
|
468
|
} |
|
469
|
if( zValue==0 ){ |
|
470
|
fossil_print("%s is not defined\n", zName); |
|
471
|
}else{ |
|
472
|
fossil_print("%s: %s\n", zName, zValue); |
|
473
|
} |
|
474
|
}else if( strncmp(zSubCmd, "status", nSubCmd)==0 ){ |
|
475
|
int i; |
|
476
|
fossil_print("search settings:\n"); |
|
477
|
for(i=0; i<count(azSettings); i++){ |
|
478
|
char *zFullname = mprintf("search-%s", azSettings[i]); |
|
479
|
char *zValue = db_get(zFullname, 0); |
|
480
|
if( zValue==0 ){ |
|
481
|
fossil_print(" %s is undefined\n", azSettings[i]); |
|
482
|
}else{ |
|
483
|
fossil_print(" %s: %s\n", azSettings[i], zValue); |
|
484
|
} |
|
485
|
fossil_free(zFullname); |
|
486
|
} |
|
487
|
if( db_table_exists("repository","ftsearchxref") ){ |
|
488
|
int n = db_int(0, "SELECT count(*) FROM ftsearchxref"); |
|
489
|
fossil_print("search is enabled with %d documents indexed\n", n); |
|
490
|
}else{ |
|
491
|
fossil_print("search is disabled\n"); |
|
492
|
} |
|
493
|
} |
|
494
|
db_end_transaction(0); |
|
495
|
} |
|
496
|
|