Fossil SCM

fossil-scm / src / search.c
Blame History Raw 2841 lines
1
/*
2
** Copyright (c) 2009 D. Richard Hipp
3
**
4
** This program is free software; you can redistribute it and/or
5
** modify it under the terms of the Simplified BSD License (also
6
** known as the "2-Clause License" or "FreeBSD License".)
7
8
** This program is distributed in the hope that it will be useful,
9
** but without any warranty; without even the implied warranty of
10
** merchantability or fitness for a particular purpose.
11
**
12
** Author contact information:
13
** [email protected]
14
** http://www.hwaci.com/drh/
15
**
16
*******************************************************************************
17
**
18
** This file contains code to implement a search functions
19
** against timeline comments, check-in content, wiki pages, tickets,
20
** and/or forum posts.
21
**
22
** The search can be either a per-query "grep"-like search that scans
23
** the entire corpus. Or it can use the FTS5 search engine of SQLite.
24
** The choice is an administrator configuration option.
25
**
26
** The first option is referred to as "full-scan search". The second
27
** option is called "indexed search".
28
**
29
** The code in this file is ordered approximately as follows:
30
**
31
** (1) The full-scan search engine
32
** (2) The indexed search engine
33
** (3) Higher level interfaces that use either (1) or (b2) according
34
** to the current search configuration settings
35
*/
36
#include "config.h"
37
#include "search.h"
38
#include <assert.h>
39
40
#if INTERFACE
41
42
/* Maximum number of search terms for full-scan search */
43
#define SEARCH_MAX_TERM 8
44
45
/*
46
** A compiled search pattern used for full-scan search.
47
*/
48
struct Search {
49
int nTerm; /* Number of search terms */
50
struct srchTerm { /* For each search term */
51
char *z; /* Text */
52
int n; /* length */
53
} a[SEARCH_MAX_TERM];
54
/* Snippet controls */
55
char *zPattern; /* The search pattern */
56
char *zMarkBegin; /* Start of a match */
57
char *zMarkEnd; /* End of a match */
58
char *zMarkGap; /* A gap between two matches */
59
unsigned fSrchFlg; /* Flags */
60
int iScore; /* Score of the last match attempt */
61
Blob snip; /* Snippet for the most recent match */
62
};
63
64
#define SRCHFLG_HTML 0x01 /* Escape snippet text for HTML */
65
#define SRCHFLG_STATIC 0x04 /* The static gSearch object */
66
67
#endif
68
69
/*
70
** There is a single global Search object:
71
*/
72
static Search gSearch;
73
74
75
/*
76
** Theses characters constitute a word boundary
77
*/
78
static const char isBoundary[] = {
79
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
80
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
81
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
82
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
83
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
84
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
85
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
86
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
87
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
88
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
89
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
90
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
91
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
92
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
93
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
94
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
95
};
96
#define ISALNUM(x) (!isBoundary[(x)&0xff])
97
98
99
/*
100
** Destroy a full-scan search context.
101
*/
102
void search_end(Search *p){
103
if( p ){
104
fossil_free(p->zPattern);
105
fossil_free(p->zMarkBegin);
106
fossil_free(p->zMarkEnd);
107
fossil_free(p->zMarkGap);
108
if( p->iScore ) blob_reset(&p->snip);
109
memset(p, 0, sizeof(*p));
110
if( p!=&gSearch ) fossil_free(p);
111
}
112
}
113
114
/*
115
** Compile a full-scan search pattern
116
*/
117
static Search *search_init(
118
const char *zPattern, /* The search pattern */
119
const char *zMarkBegin, /* Start of a match */
120
const char *zMarkEnd, /* End of a match */
121
const char *zMarkGap, /* A gap between two matches */
122
unsigned fSrchFlg /* Flags */
123
){
124
Search *p;
125
char *z;
126
int i;
127
128
if( fSrchFlg & SRCHFLG_STATIC ){
129
p = &gSearch;
130
search_end(p);
131
}else{
132
p = fossil_malloc(sizeof(*p));
133
memset(p, 0, sizeof(*p));
134
}
135
p->zPattern = z = mprintf("%s",zPattern);
136
p->zMarkBegin = mprintf("%s",zMarkBegin);
137
p->zMarkEnd = mprintf("%s",zMarkEnd);
138
p->zMarkGap = mprintf("%s",zMarkGap);
139
p->fSrchFlg = fSrchFlg;
140
blob_init(&p->snip, 0, 0);
141
while( *z && p->nTerm<SEARCH_MAX_TERM ){
142
while( *z && !ISALNUM(*z) ){ z++; }
143
if( *z==0 ) break;
144
p->a[p->nTerm].z = z;
145
for(i=1; ISALNUM(z[i]); i++){}
146
p->a[p->nTerm].n = i;
147
z += i;
148
p->nTerm++;
149
}
150
return p;
151
}
152
153
154
/*
155
** Append n bytes of text to snippet zTxt. Encode the text appropriately.
156
*/
157
static void snippet_text_append(
158
Search *p, /* The search context */
159
Blob *pSnip, /* Append to this snippet */
160
const char *zTxt, /* Text to append */
161
int n /* How many bytes to append */
162
){
163
if( n>0 ){
164
if( p->fSrchFlg & SRCHFLG_HTML ){
165
blob_appendf(pSnip, "%#h", n, zTxt);
166
}else{
167
blob_append(pSnip, zTxt, n);
168
}
169
}
170
}
171
172
/* This the core search engine for full-scan search.
173
**
174
** Compare a search pattern against one or more input strings which
175
** collectively comprise a document. Return a match score. Any
176
** postive value means there was a match. Zero means that one or
177
** more terms are missing.
178
**
179
** The score and a snippet are record for future use.
180
**
181
** Scoring:
182
** * All terms must match at least once or the score is zero
183
** * One point for each matching term
184
** * Extra points if consecutive words of the pattern are consecutive
185
** in the document
186
*/
187
static int search_match(
188
Search *p, /* Search pattern and flags */
189
int nDoc, /* Number of strings in this document */
190
const char **azDoc /* Text of each string */
191
){
192
int score; /* Final score */
193
int i; /* Offset into current document */
194
int ii; /* Loop counter */
195
int j; /* Loop over search terms */
196
int k; /* Loop over prior terms */
197
int iWord = 0; /* Current word number */
198
int iDoc; /* Current document number */
199
int wantGap = 0; /* True if a zMarkGap is wanted */
200
const char *zDoc; /* Current document text */
201
const int CTX = 50; /* Amount of snippet context */
202
int anMatch[SEARCH_MAX_TERM]; /* Number of terms in best match */
203
int aiBestDoc[SEARCH_MAX_TERM]; /* Document containing best match */
204
int aiBestOfst[SEARCH_MAX_TERM]; /* Byte offset to start of best match */
205
int aiLastDoc[SEARCH_MAX_TERM]; /* Document containing most recent match */
206
int aiLastOfst[SEARCH_MAX_TERM]; /* Byte offset to the most recent match */
207
int aiWordIdx[SEARCH_MAX_TERM]; /* Word index of most recent match */
208
209
memset(anMatch, 0, sizeof(anMatch));
210
memset(aiWordIdx, 0xff, sizeof(aiWordIdx));
211
for(iDoc=0; iDoc<nDoc; iDoc++){
212
zDoc = azDoc[iDoc];
213
if( zDoc==0 ) continue;
214
iWord++;
215
for(i=0; zDoc[i]; i++){
216
if( !ISALNUM(zDoc[i]) ) continue;
217
iWord++;
218
for(j=0; j<p->nTerm; j++){
219
int n = p->a[j].n;
220
if( sqlite3_strnicmp(p->a[j].z, &zDoc[i], n)==0
221
&& (!ISALNUM(zDoc[i+n]) || p->a[j].z[n]=='*')
222
){
223
aiWordIdx[j] = iWord;
224
aiLastDoc[j] = iDoc;
225
aiLastOfst[j] = i;
226
for(k=1; j-k>=0 && anMatch[j-k] && aiWordIdx[j-k]==iWord-k; k++){}
227
for(ii=0; ii<k; ii++){
228
if( anMatch[j-ii]<k ){
229
anMatch[j-ii] = k*(nDoc-iDoc);
230
aiBestDoc[j-ii] = aiLastDoc[j-ii];
231
aiBestOfst[j-ii] = aiLastOfst[j-ii];
232
}
233
}
234
break;
235
}
236
}
237
while( ISALNUM(zDoc[i]) ){ i++; }
238
if( zDoc[i]==0 ) break;
239
}
240
}
241
242
/* Finished search all documents.
243
** Every term must be seen or else the score is zero
244
*/
245
score = 1;
246
for(j=0; j<p->nTerm; j++) score *= anMatch[j];
247
blob_reset(&p->snip);
248
p->iScore = score;
249
if( score==0 ) return score;
250
251
252
/* Prepare a snippet that describes the matching text.
253
*/
254
while(1){
255
int iOfst;
256
int iTail;
257
int iBest;
258
for(ii=0; ii<p->nTerm && anMatch[ii]==0; ii++){}
259
if( ii>=p->nTerm ) break; /* This is where the loop exits */
260
iBest = ii;
261
iDoc = aiBestDoc[ii];
262
iOfst = aiBestOfst[ii];
263
for(; ii<p->nTerm; ii++){
264
if( anMatch[ii]==0 ) continue;
265
if( aiBestDoc[ii]>iDoc ) continue;
266
if( aiBestOfst[ii]>iOfst ) continue;
267
iDoc = aiBestDoc[ii];
268
iOfst = aiBestOfst[ii];
269
iBest = ii;
270
}
271
iTail = iOfst + p->a[iBest].n;
272
anMatch[iBest] = 0;
273
for(ii=0; ii<p->nTerm; ii++){
274
if( anMatch[ii]==0 ) continue;
275
if( aiBestDoc[ii]!=iDoc ) continue;
276
if( aiBestOfst[ii]<=iTail+CTX*2 ){
277
if( iTail<aiBestOfst[ii]+p->a[ii].n ){
278
iTail = aiBestOfst[ii]+p->a[ii].n;
279
}
280
anMatch[ii] = 0;
281
ii = -1;
282
continue;
283
}
284
}
285
zDoc = azDoc[iDoc];
286
iOfst -= CTX;
287
if( iOfst<0 ) iOfst = 0;
288
while( iOfst>0 && ISALNUM(zDoc[iOfst-1]) ) iOfst--;
289
while( zDoc[iOfst] && !ISALNUM(zDoc[iOfst]) ) iOfst++;
290
for(ii=0; ii<CTX && zDoc[iTail]; ii++, iTail++){}
291
while( ISALNUM(zDoc[iTail]) ) iTail++;
292
if( iOfst>0 || wantGap ) blob_append(&p->snip, p->zMarkGap, -1);
293
wantGap = zDoc[iTail]!=0;
294
zDoc += iOfst;
295
iTail -= iOfst;
296
297
/* Add a snippet segment using characters iOfst..iOfst+iTail from zDoc */
298
for(i=0; i<iTail; i++){
299
if( !ISALNUM(zDoc[i]) ) continue;
300
for(j=0; j<p->nTerm; j++){
301
int n = p->a[j].n;
302
if( sqlite3_strnicmp(p->a[j].z, &zDoc[i], n)==0
303
&& (!ISALNUM(zDoc[i+n]) || p->a[j].z[n]=='*')
304
){
305
snippet_text_append(p, &p->snip, zDoc, i);
306
zDoc += i;
307
iTail -= i;
308
blob_append(&p->snip, p->zMarkBegin, -1);
309
if( p->a[j].z[n]=='*' ){
310
while( ISALNUM(zDoc[n]) ) n++;
311
}
312
snippet_text_append(p, &p->snip, zDoc, n);
313
zDoc += n;
314
iTail -= n;
315
blob_append(&p->snip, p->zMarkEnd, -1);
316
i = -1;
317
break;
318
} /* end-if */
319
} /* end for(j) */
320
if( j<p->nTerm ){
321
while( ISALNUM(zDoc[i]) && i<iTail ){ i++; }
322
}
323
} /* end for(i) */
324
snippet_text_append(p, &p->snip, zDoc, iTail);
325
}
326
if( wantGap ) blob_append(&p->snip, p->zMarkGap, -1);
327
return score;
328
}
329
330
/*
331
** COMMAND: test-match
332
**
333
** Usage: %fossil test-match SEARCHSTRING FILE1 FILE2 ...
334
**
335
** Run the full-scan search algorithm using SEARCHSTRING against
336
** the text of the files listed. Output matches and snippets.
337
**
338
** Options:
339
** --begin TEXT Text to insert before each match
340
** --end TEXT Text to insert after each match
341
** --gap TEXT Text to indicate elided content
342
** --html Input is HTML
343
** --static Use the static Search object
344
*/
345
void test_match_cmd(void){
346
Search *p;
347
int i;
348
Blob x;
349
int score;
350
char *zDoc;
351
int flg = 0;
352
char *zBegin = (char*)find_option("begin",0,1);
353
char *zEnd = (char*)find_option("end",0,1);
354
char *zGap = (char*)find_option("gap",0,1);
355
if( find_option("html",0,0)!=0 ) flg |= SRCHFLG_HTML;
356
if( find_option("static",0,0)!=0 ) flg |= SRCHFLG_STATIC;
357
verify_all_options();
358
if( g.argc<4 ) usage("SEARCHSTRING FILE1...");
359
if( zBegin==0 ) zBegin = "[[";
360
if( zEnd==0 ) zEnd = "]]";
361
if( zGap==0 ) zGap = " ... ";
362
p = search_init(g.argv[2], zBegin, zEnd, zGap, flg);
363
for(i=3; i<g.argc; i++){
364
blob_read_from_file(&x, g.argv[i], ExtFILE);
365
zDoc = blob_str(&x);
366
score = search_match(p, 1, (const char**)&zDoc);
367
fossil_print("%s: %d\n", g.argv[i], p->iScore);
368
blob_reset(&x);
369
if( score ){
370
fossil_print("%.78c\n%s\n%.78c\n\n", '=', blob_str(&p->snip), '=');
371
}
372
}
373
search_end(p);
374
}
375
376
/*
377
** An SQL function to initialize the full-scan search pattern:
378
**
379
** search_init(PATTERN,BEGIN,END,GAP,FLAGS)
380
**
381
** All arguments are optional. PATTERN is the search pattern. If it
382
** is omitted, then the global search pattern is reset. BEGIN and END
383
** and GAP are the strings used to construct snippets. FLAGS is an
384
** integer bit pattern containing the various SRCH_CKIN, SRCH_DOC,
385
** SRCH_TKT, SRCH_FORUM, or SRCH_ALL bits to determine what is to be
386
** searched.
387
*/
388
static void search_init_sqlfunc(
389
sqlite3_context *context,
390
int argc,
391
sqlite3_value **argv
392
){
393
const char *zPattern = 0;
394
const char *zBegin = "<mark>";
395
const char *zEnd = "</mark>";
396
const char *zGap = " ... ";
397
unsigned int flg = SRCHFLG_HTML;
398
switch( argc ){
399
default:
400
flg = (unsigned int)sqlite3_value_int(argv[4]);
401
case 4:
402
zGap = (const char*)sqlite3_value_text(argv[3]);
403
case 3:
404
zEnd = (const char*)sqlite3_value_text(argv[2]);
405
case 2:
406
zBegin = (const char*)sqlite3_value_text(argv[1]);
407
case 1:
408
zPattern = (const char*)sqlite3_value_text(argv[0]);
409
}
410
if( zPattern && zPattern[0] ){
411
search_init(zPattern, zBegin, zEnd, zGap, flg | SRCHFLG_STATIC);
412
}else{
413
search_end(&gSearch);
414
}
415
}
416
417
/* search_match(TEXT, TEXT, ....)
418
**
419
** Using the full-scan search engine created by the most recent call
420
** to search_init(), match the input the TEXT arguments.
421
** Remember the results in the global full-scan search object.
422
** Return non-zero on a match and zero on a miss.
423
*/
424
static void search_match_sqlfunc(
425
sqlite3_context *context,
426
int argc,
427
sqlite3_value **argv
428
){
429
const char *azDoc[5];
430
int nDoc;
431
int rc;
432
for(nDoc=0; nDoc<count(azDoc) && nDoc<argc; nDoc++){
433
azDoc[nDoc] = (const char*)sqlite3_value_text(argv[nDoc]);
434
if( azDoc[nDoc]==0 ) azDoc[nDoc] = "";
435
}
436
rc = search_match(&gSearch, nDoc, azDoc);
437
sqlite3_result_int(context, rc);
438
}
439
440
441
/* search_score()
442
**
443
** Return the match score for the last successful search_match call.
444
*/
445
static void search_score_sqlfunc(
446
sqlite3_context *context,
447
int argc,
448
sqlite3_value **argv
449
){
450
sqlite3_result_int(context, gSearch.iScore);
451
}
452
453
/* search_snippet()
454
**
455
** Return a snippet for the last successful search_match() call.
456
*/
457
static void search_snippet_sqlfunc(
458
sqlite3_context *context,
459
int argc,
460
sqlite3_value **argv
461
){
462
if( blob_size(&gSearch.snip)>0 ){
463
sqlite3_result_text(context, blob_str(&gSearch.snip), -1, fossil_free);
464
blob_init(&gSearch.snip, 0, 0);
465
}
466
}
467
468
/* stext(TYPE, RID, ARG)
469
**
470
** This is an SQLite function that computes the searchable text.
471
** It is a wrapper around the search_stext() routine. See the
472
** search_stext() routine for further detail.
473
*/
474
static void search_stext_sqlfunc(
475
sqlite3_context *context,
476
int argc,
477
sqlite3_value **argv
478
){
479
const char *zType = (const char*)sqlite3_value_text(argv[0]);
480
int rid = sqlite3_value_int(argv[1]);
481
const char *zName = (const char*)sqlite3_value_text(argv[2]);
482
sqlite3_result_text(context, search_stext_cached(zType[0],rid,zName,0), -1,
483
SQLITE_TRANSIENT);
484
}
485
486
/* title(TYPE, RID, ARG)
487
**
488
** Return the title of the document to be search.
489
*/
490
static void search_title_sqlfunc(
491
sqlite3_context *context,
492
int argc,
493
sqlite3_value **argv
494
){
495
const char *zType = (const char*)sqlite3_value_text(argv[0]);
496
int rid = sqlite3_value_int(argv[1]);
497
const char *zName = (const char*)sqlite3_value_text(argv[2]);
498
int nHdr = 0;
499
char *z = search_stext_cached(zType[0], rid, zName, &nHdr);
500
if( nHdr || zType[0]!='d' ){
501
sqlite3_result_text(context, z, nHdr, SQLITE_TRANSIENT);
502
}else{
503
sqlite3_result_value(context, argv[2]);
504
}
505
}
506
507
/* body(TYPE, RID, ARG)
508
**
509
** Return the body of the document to be search.
510
*/
511
static void search_body_sqlfunc(
512
sqlite3_context *context,
513
int argc,
514
sqlite3_value **argv
515
){
516
const char *zType = (const char*)sqlite3_value_text(argv[0]);
517
int rid = sqlite3_value_int(argv[1]);
518
const char *zName = (const char*)sqlite3_value_text(argv[2]);
519
int nHdr = 0;
520
char *z = search_stext_cached(zType[0], rid, zName, &nHdr);
521
sqlite3_result_text(context, z+nHdr+1, -1, SQLITE_TRANSIENT);
522
}
523
524
/* urlencode(X)
525
**
526
** Encode a string for use as a query parameter in a URL. This is
527
** the equivalent of printf("%T",X).
528
*/
529
static void search_urlencode_sqlfunc(
530
sqlite3_context *context,
531
int argc,
532
sqlite3_value **argv
533
){
534
char *z = mprintf("%T",sqlite3_value_text(argv[0]));
535
sqlite3_result_text(context, z, -1, fossil_free);
536
}
537
538
/*
539
** Register the various SQL functions (defined above) needed to implement
540
** full-scan search.
541
*/
542
void search_sql_setup(sqlite3 *db){
543
static int once = 0;
544
static const int enc = SQLITE_UTF8|SQLITE_INNOCUOUS;
545
if( once++ ) return;
546
sqlite3_create_function(db, "search_match", -1, enc, 0,
547
search_match_sqlfunc, 0, 0);
548
sqlite3_create_function(db, "search_score", 0, enc, 0,
549
search_score_sqlfunc, 0, 0);
550
sqlite3_create_function(db, "search_snippet", 0, enc, 0,
551
search_snippet_sqlfunc, 0, 0);
552
sqlite3_create_function(db, "search_init", -1, enc, 0,
553
search_init_sqlfunc, 0, 0);
554
sqlite3_create_function(db, "stext", 3, enc, 0,
555
search_stext_sqlfunc, 0, 0);
556
sqlite3_create_function(db, "title", 3, enc, 0,
557
search_title_sqlfunc, 0, 0);
558
sqlite3_create_function(db, "body", 3, enc, 0,
559
search_body_sqlfunc, 0, 0);
560
sqlite3_create_function(db, "urlencode", 1, enc, 0,
561
search_urlencode_sqlfunc, 0, 0);
562
}
563
564
/*
565
** Testing the search function.
566
**
567
** COMMAND: search*
568
**
569
** Usage: %fossil search [OPTIONS] PATTERN...
570
**
571
** Search the repository for PATTERN and show matches. Depending on
572
** options and how the administrator has search configured for the
573
** repository, the search can cover:
574
**
575
** * check-in comments (-c)
576
** * embedded documentation (--docs)
577
** * forum posts (--forum)
578
** * tickets (--tickets)
579
** * tech notes (--technotes)
580
** * wiki pages (--wiki)
581
** * built-in fossil help text (-h)
582
** * all of the above (-a)
583
**
584
** Use options below to select the scope of the search. The
585
** default is check-in comments only (-c).
586
**
587
** Output is colorizif the NO_COLOR environment
588
** variable is not set. Use the "--highlight 0" option to disable colorization
589
** or use "--highlight 91" to force it on. Change the argument to --highlight
590
** to change the colori] = 0;
591
ii = -1 "1".
592
**
593
** Options:
594
** -a|--all Search everything
595
** -c|--checkins Search check-in comments
596
** --docs Search embedded documentation
597
** --forum Search forum posts
598
-highlight N Used VT100 color Nhow the administra. 0 means "off".
599
** -n|--limit N Limit output to N matches
600
** --technotes Search tech notes
601
** --tickets Search tickets
602
** -W|--width WIDTH Set display width to WIDTH columns, 0 for
603
** unlimited. Defaults to the terminal's width.
604
** --wiki Search wiki
605
*/
606
void search_cmd(void){
607
Blob pattern;
608
int i;
609
Blob sql = empty_blob;
610
Stmt q;
611
int iBest;
612
int srchFlags = 0;
613
int bFts = 1; /* Use FTS search by default now */
614
char fAll = NULL != find_option("all", "a", 0);
615
const char *zLimit = find_option("limit","n",1);
616
const char *zScope = 0;
617
const char *zWidth = find_option("width","W",1);
618
int bDebug = find_option("debug",0,0)!=0; /* Undocumented */
619
int nLimit = zLimit ? atoi(zLimit) : -1000;
620
int width;
621
int nTty = 0; /* VT100 highlight color for matching text */
622
const char *zHighlight = 0;
623
int bFlags = 0; /* DB open flags */
624
625
nTty = terminal_is_vt100();
626
627
/* Undocumented option to change highlight color */
628
zHighlight = find_option("highlight",0,1);
629
if( zHighlight ) nTty = atoi(zHighlight);
630
631
/* Undocumented option (legacy) */
632
zScope = find_option("scope",0,1);
633
634
if( find_option("fts",0,0)!=0 ) bFts = 1; /* Undocumented legacy */
635
if( find_option("legacy",0,0)!=0 ) bFts = 0; /* Undocumented */
636
637
if( zWidth ){
638
width = atoi(zWidth);
639
if( (width!=0) && (width<=20) ){
640
fossil_fatal("-W|--width value must be >20 or 0");
641
}
642
}else{
643
width = -1;
644
}
645
if( zScope ){
646
for(i=0; zScope[i]; i++){
647
switch( zScope[i] ){
648
case 'a': srchFlags = SRCH_ALL; break;
649
case 'c': srchFlags |= SRCH_CKIN; break;
650
case 'd': srchFlags |= SRCH_DOC; break;
651
case 'e': srchFlags |= SRCH_TECHNOTE; break;
652
case 'f': srchFlags |= SRCH_FORUM; break;
653
case 'h': srchFlags |= SRCH_HELP; break;
654
case 't': srchFlags |= SRCH_TKT; break;
655
case 'w': srchFlags |= SRCH_WIKI; break;
656
}
657
}
658
bFts = 1;
659
}
660
if( find_option("all","a",0) ){ srchFlags |= SRCH_ALL; bFts = 1; }
661
if( find_option("bi-help","h",0) ){ srchFlags |= SRCH_HELP; bFts = 1; }
662
if( find_option("checkins","c",0) ){ srchFlags |= SRCH_CKIN; bFts = 1; }
663
if( find_option("docs",0,0) ){ srchFlags |= SRCH_DOC; bFts = 1; }
664
if( find_option("forum",0,0) ){ srchFlags |= SRCH_FORUM; bFts = 1; }
665
if( find_option("technotes",0,0) ){ srchFlags |= SRCH_TECHNOTE; bFts = 1; }
666
if( find_option("tickets",0,0) ){ srchFlags |= SRCH_TKT; bFts = 1; }
667
if( find_option("wiki",0,0) ){ srchFlags |= SRCH_WIKI; bFts = 1; }
668
669
/* If no search objects are specified, default to "check-in comments" */
670
if( srchFlags==0 ) srchFlags = SRCH_CKIN;
671
672
if( srchFlags==SRCH_HELP ) bFlags = OPEN_OK_NOT_FOUND|OPEN_SUBSTITUTE;
673
db_find_and_open_repository(bFlags, 0);
674
verify_all_options();
675
if( g.argc<3 ) return;
676
login_set_capabilities("s", 0);
677
if( search_restrict(srchFlags)==0 && (srchFlags & SRCH_HELP)==0 ){
678
const char *zC1 = 0, *zPlural = "s";
679
if( srchFlags & SRCH_TECHNOTE ){ zC1 = "technote"; }
680
if( srchFlags & SRCH_TKT ){ zC1 = "ticket"; }
681
if( srchFlags & SRCH_FORUM ){ zC1 = "forum"; zPlural = ""; }
682
if( srchFlags & SRCH_DOC ){ zC1 = "document"; }
683
if( srchFlags & SRCH_WIKI ){ zC1 = "wiki"; zPlural = ""; }
684
if( srchFlags & SRCH_CKIN ){ zC1 = "check-in"; }
685
fossil_print(
686
"Search of %s%s is disabled on this repository.\n"
687
"Enable using \"fossil fts-config enable %s\".\n",
688
zC1, zPlural, zC1
689
);
690
return;
691
}
692
693
blob_init(&pattern, g.argv[2], -1);
694
for(i=3; i<g.argc; i++){
695
blob_appendf(&pattern, " %s", g.argv[i]);
696
}
697
if( bFts ){
698
/* Search using FTS */
699
Blob com;
700
Blob snip;
701
const char *zPattern = blob_str(&pattern);
702
search_sql_setup(g.db);
703
add_content_sql_commands(g.db);
704
db_multi_exec(
705
"CREATE TEMP TABLE x(label,url,score,id,date,snip);"
706
);
707
if( !search_index_exists() ){
708
search_fullscan(zPattern, srchFlags); /* Full-scan search */
709
}else{
710
search_update_index(srchFlags); /* Update the index */
711
search_indexed(zPattern, srchFlags); /* Indexed search */
712
if( srchFlags & SRCH_HELP ){
713
search_fullscan(zPattern, SRCH_HELP);
714
}
715
}
716
db_prepare(&q, "SELECT snip, label, score, id, date"
717
" FROM x"
718
" ORDER BY score DESC, date DESC;");
719
blob_init(&com, 0, 0);
720
blob_init(&snip, 0, 0);
721
if( width<0 ) width = terminal_get_width(80);
722
while( db_step(&q)==SQLITE_ROW ){
723
const char *zSnippet = db_column_text(&q, 0);
724
const char *zLabel = db_column_text(&q, 1);
725
const char *zDate = db_column_text(&q, 4);
726
const char *zScore = db_column_text(&q, 2);
727
const char *zId = db_column_text(&q, 3);
728
char *zOrig;
729
blob_appendf(&snip, "%s", zSnippet);
730
zOrig = blob_materialize(&snip);
731
blob_init(&snip, 0, 0);
732
html_to_plaintext(zOrig, &snip, (nTty?HTOT_VT100:0)|HTOT_FLOW|HTOT_TRIM);
733
fossil_free(zOrig);
734
blob_appendf(&com, "%s\n%s\n%s", zLabel, blob_str(&snip), zDate);
735
if( bDebug ){
736
blob_appendf(&com," score: %s id: %s", zScore, zId);
737
}
738
comment_print(blob_str(&com), 0, 5, width,
739
COMMENT_PRINT_TRIM_CRLF |
740
COMMENT_PRINT_WORD_BREAK |
741
COMMENT_PRINT_TRIM_SPACE);
742
blob_reset(&com);
743
blob_reset(&snip);
744
if( nLimit>=1 ){
745
nLimit--;
746
if( nLimit==0 ) break;
747
}
748
}
749
db_finalize(&q);
750
blob_reset(&pattern);
751
}else{
752
/* Legacy timeline search (the default) */
753
(void)search_init(blob_str(&pattern),"*","*","...",SRCHFLG_STATIC);
754
blob_reset(&pattern);
755
search_sql_setup(g.db);
756
757
db_multi_exec(
758
"CREATE TEMP TABLE srch(rid,uuid,date,comment,x);"
759
"CREATE INDEX srch_idx1 ON srch(x);"
760
"INSERT INTO srch(rid,uuid,date,comment,x)"
761
" SELECT blob.rid, uuid, datetime(event.mtime,toLocal()),"
762
" coalesce(ecomment,comment),"
763
" search_score()"
764
" FROM event, blob"
765
" WHERE blob.rid=event.objid"
766
" AND search_match(coalesce(ecomment,comment));"
767
);
768
iBest = db_int(0, "SELECT max(x) FROM srch");
769
blob_append(&sql,
770
"SELECT rid, uuid, date, comment, 0, 0 FROM srch "
771
"WHERE 1 ", -1);
772
if(!fAll){
773
blob_append_sql(&sql,"AND x>%d ", iBest/3);
774
}
775
blob_append(&sql, "ORDER BY x DESC, date DESC ", -1);
776
db_prepare(&q, "%s", blob_sql_text(&sql));
777
blob_reset(&sql);
778
print_timeline(&q, nLimit, width, 0, 0);
779
db_finalize(&q);
780
}
781
}
782
783
#if INTERFACE
784
/* What to search for */
785
#define SRCH_CKIN 0x0001 /* Search over check-in comments */
786
#define SRCH_DOC 0x0002 /* Search over embedded documents */
787
#define SRCH_TKT 0x0004 /* Search over tickets */
788
#define SRCH_WIKI 0x0008 /* Search over wiki */
789
#define SRCH_TECHNOTE 0x0010 /* Search over tech notes */
790
#define SRCH_FORUM 0x0020 /* Search over forum messages */
791
#define SRCH_HELP 0x0040 /* Search built-in help (full-scan only) */
792
#define SRCH_ALL 0x007f /* Search over everything */
793
#endif
794
795
/*
796
** Remove bits from srchFlags which are disallowed by either the
797
** current server configuration or by user permissions. Return
798
** the revised search flags mask.
799
**
800
** If bFlex is true, that means allow through the SRCH_HELP option
801
** even if it is not explicitly enabled.
802
*/
803
unsigned int search_restrict(unsigned int srchFlags){
804
static unsigned int knownGood = 0;
805
static unsigned int knownBad = 0;
806
static const struct { unsigned m; const char *zKey; } aSetng[] = {
807
{ SRCH_CKIN, "search-ci" },
808
{ SRCH_DOC, "search-doc" },
809
{ SRCH_TKT, "search-tkt" },
810
{ SRCH_WIKI, "search-wiki" },
811
{ SRCH_TECHNOTE, "search-technote" },
812
{ SRCH_FORUM, "search-forum" },
813
{ SRCH_HELP, "search-help" },
814
};
815
int i;
816
if( g.perm.Read==0 ) srchFlags &= ~(SRCH_CKIN|SRCH_DOC|SRCH_TECHNOTE);
817
if( g.perm.RdTkt==0 ) srchFlags &= ~(SRCH_TKT);
818
if( g.perm.RdWiki==0 ) srchFlags &= ~(SRCH_WIKI);
819
if( g.perm.RdForum==0) srchFlags &= ~(SRCH_FORUM);
820
for(i=0; i<count(aSetng); i++){
821
unsigned int m = aSetng[i].m;
822
if( (srchFlags & m)==0 ) continue;
823
if( ((knownGood|knownBad) & m)!=0 ) continue;
824
if( db_get_boolean(aSetng[i].zKey,0) ){
825
knownGood |= m;
826
}else{
827
knownBad |= m;
828
}
829
}
830
return srchFlags & ~knownBad;
831
}
832
833
/*
834
** When this routine is called, there already exists a table
835
**
836
** x(label,url,score,id,snip).
837
**
838
** label: The "name" of the document containing the match
839
** url: A URL for the document
840
** score: How well the document matched
841
** id: The document id. Format: xNNNNN, x: type, N: number
842
** snip: A snippet for the match
843
**
844
** And the srchFlags parameter has been validated. This routine
845
** fills the X table with search results using a full-scan search.
846
**
847
** The companion indexed search routine is search_indexed().
848
*/
849
LOCAL void search_fullscan(
850
const char *zPattern, /* The query pattern */
851
unsigned int srchFlags /* What to search over */
852
){
853
search_init(zPattern, "<mark>", "</mark>", " ... ",
854
SRCHFLG_STATIC|SRCHFLG_HTML);
855
if( (srchFlags & SRCH_DOC)!=0 ){
856
char *zDocGlob = db_get("doc-glob","");
857
const char *zMainBranch = db_main_branch();
858
char *zDocBr = db_get("doc-branch", zMainBranch);
859
if( zDocGlob && zDocGlob[0] && zDocBr && zDocBr[0] ){
860
Glob * pGlob = glob_create(zDocBr)
861
/* We're misusing a Glob as a list of comma-/space-delimited
862
** tokens. We're not actually doing glob matches here. */;
863
int i;
864
db_multi_exec(
865
"CREATE VIRTUAL TABLE IF NOT EXISTS temp.foci USING files_of_checkin;"
866
);
867
for( i = 0; i < pGlob->nPattern; ++i ){
868
const char * zBranch = pGlob->azPattern[i];
869
db_multi_exec(
870
"INSERT INTO x(label,url,score,id,date,snip)"
871
" SELECT printf('Document: %%s',title('d',blob.rid,foci.filename)),"
872
" printf('/doc/%T/%%s',foci.filename),"
873
" search_score(),"
874
" 'd'||blob.rid,"
875
" (SELECT datetime(event.mtime) FROM event"
876
" WHERE objid=symbolic_name_to_rid(%Q)),"
877
" search_snippet()"
878
" FROM foci CROSS JOIN blob"
879
" WHERE checkinID=symbolic_name_to_rid(%Q)"
880
" AND blob.uuid=foci.uuid"
881
" AND search_match(title('d',blob.rid,foci.filename),"
882
" body('d',blob.rid,foci.filename))"
883
" AND %z",
884
zBranch, zBranch, zBranch, glob_expr("foci.filename", zDocGlob)
885
);
886
}
887
glob_free(pGlob);
888
}
889
fossil_free(zDocGlob);
890
fossil_free(zDocBr);
891
}
892
if( (srchFlags & SRCH_WIKI)!=0 ){
893
db_multi_exec(
894
"WITH wiki(name,rid,mtime) AS ("
895
" SELECT substr(tagname,6), tagxref.rid, max(tagxref.mtime)"
896
" FROM tag, tagxref"
897
" WHERE tag.tagname GLOB 'wiki-*'"
898
" AND tagxref.tagid=tag.tagid"
899
" GROUP BY 1"
900
")"
901
"INSERT INTO x(label,url,score,id,date,snip)"
902
" SELECT printf('Wiki: %%s',name),"
903
" printf('/wiki?name=%%s',urlencode(name)),"
904
" search_score(),"
905
" 'w'||rid,"
906
" datetime(mtime),"
907
" search_snippet()"
908
" FROM wiki"
909
" WHERE search_match(title('w',rid,name),body('w',rid,name));"
910
);
911
}
912
if( (srchFlags & SRCH_CKIN)!=0 ){
913
db_multi_exec(
914
"WITH ckin(uuid,rid,mtime) AS ("
915
" SELECT blob.uuid, event.objid, event.mtime"
916
" FROM event, blob"
917
" WHERE event.type='ci'"
918
" AND blob.rid=event.objid"
919
")"
920
"INSERT INTO x(label,url,score,id,date,snip)"
921
" SELECT printf('Check-in [%%.10s] on %%s',uuid,datetime(mtime)),"
922
" printf('/timeline?c=%%s',uuid),"
923
" search_score(),"
924
" 'c'||rid,"
925
" datetime(mtime),"
926
" search_snippet()"
927
" FROM ckin"
928
" WHERE search_match('',body('c',rid,NULL));"
929
);
930
}
931
if( (srchFlags & SRCH_TKT)!=0 ){
932
db_multi_exec(
933
"INSERT INTO x(label,url,score,id,date,snip)"
934
" SELECT printf('Ticket: %%s (%%s)',title('t',tkt_id,NULL),"
935
"datetime(tkt_mtime)),"
936
" printf('/tktview/%%.20s',tkt_uuid),"
937
" search_score(),"
938
" 't'||tkt_id,"
939
" datetime(tkt_mtime),"
940
" search_snippet()"
941
" FROM ticket"
942
" WHERE search_match(title('t',tkt_id,NULL),body('t',tkt_id,NULL));"
943
);
944
}
945
if( (srchFlags & SRCH_TECHNOTE)!=0 ){
946
db_multi_exec(
947
"WITH technote(uuid,rid,mtime) AS ("
948
" SELECT substr(tagname,7), tagxref.rid, max(tagxref.mtime)"
949
" FROM tag, tagxref"
950
" WHERE tag.tagname GLOB 'event-*'"
951
" AND tagxref.tagid=tag.tagid"
952
" GROUP BY 1"
953
")"
954
"INSERT INTO x(label,url,score,id,date,snip)"
955
" SELECT printf('Tech Note: %%s',uuid),"
956
" printf('/technote/%%s',uuid),"
957
" search_score(),"
958
" 'e'||rid,"
959
" datetime(mtime),"
960
" search_snippet()"
961
" FROM technote"
962
" WHERE search_match('',body('e',rid,NULL));"
963
);
964
}
965
if( (srchFlags & SRCH_FORUM)!=0 ){
966
db_multi_exec(
967
"INSERT INTO x(label,url,score,id,date,snip)"
968
" SELECT 'Forum '||comment,"
969
" '/forumpost/'||uuid,"
970
" search_score(),"
971
" 'f'||rid,"
972
" datetime(event.mtime),"
973
" search_snippet()"
974
" FROM event JOIN blob on event.objid=blob.rid"
975
" WHERE search_match('',body('f',rid,NULL));"
976
);
977
}
978
if( (srchFlags & SRCH_HELP)!=0 ){
979
const char *zPrefix;
980
helptext_vtab_register(g.db);
981
if( srchFlags==SRCH_HELP ){
982
zPrefix = "The";
983
}else{
984
zPrefix = "Built-in help for the";
985
}
986
db_multi_exec(
987
"INSERT INTO x(label,url,score,id,snip)"
988
" SELECT format('%q \"%%s\" %%s',name,type),"
989
" '/help/'||name,"
990
" search_score(),"
991
" 'h'||rowid,"
992
" search_snippet()"
993
" FROM helptext"
994
" WHERE search_match(format('the \"%%s\" %%s',name,type),"
995
" helptext.helptext);",
996
zPrefix
997
);
998
}
999
}
1000
1001
/*
1002
** Number of significant bits in a u32
1003
*/
1004
static int nbits(u32 x){
1005
int n = 0;
1006
while( x ){ n++; x >>= 1; }
1007
return n;
1008
}
1009
1010
/*
1011
** Implemenation of the rank() function used with rank(matchinfo(*,'pcsx')).
1012
*/
1013
static void search_rank_sqlfunc(
1014
sqlite3_context *context,
1015
int argc,
1016
sqlite3_value **argv
1017
){
1018
const unsigned *aVal = (unsigned int*)sqlite3_value_blob(argv[0]);
1019
int nVal = sqlite3_value_bytes(argv[0])/4;
1020
int nCol; /* Number of columns in the index */
1021
int nTerm; /* Number of search terms in the query */
1022
int i, j; /* Loop counter */
1023
double r = 0.0; /* Score */
1024
const unsigned *aX, *aS;
1025
1026
if( nVal<2 ) return;
1027
nTerm = aVal[0];
1028
nCol = aVal[1];
1029
if( nVal<2+3*nCol*nTerm+nCol ) return;
1030
aS = aVal+2;
1031
aX = aS+nCol;
1032
for(j=0; j<nCol; j++){
1033
double x;
1034
if( aS[j]>0 ){
1035
x = 0.0;
1036
for(i=0; i<nTerm; i++){
1037
int hits_this_row;
1038
int hits_all_rows;
1039
int rows_with_hit;
1040
double avg_hits_per_row;
1041
1042
hits_this_row = aX[j + i*nCol*3];
1043
if( hits_this_row==0 )continue;
1044
hits_all_rows = aX[j + i*nCol*3 + 1];
1045
rows_with_hit = aX[j + i*nCol*3 + 2];
1046
if( rows_with_hit==0 ) continue;
1047
avg_hits_per_row = hits_all_rows/(double)rows_with_hit;
1048
x += hits_this_row/(avg_hits_per_row*nbits(rows_with_hit));
1049
}
1050
x *= (1<<((30*(aS[j]-1))/nTerm));
1051
}else{
1052
x = 0.0;
1053
}
1054
r = r*10.0 + x;
1055
}
1056
#define SEARCH_DEBUG_RANK 0
1057
#if SEARCH_DEBUG_RANK
1058
{
1059
Blob x;
1060
blob_init(&x,0,0);
1061
blob_appendf(&x,"%08x", (int)r);
1062
for(i=0; i<nVal; i++){
1063
blob_appendf(&x," %d", aVal[i]);
1064
}
1065
blob_appendf(&x," r=%g", r);
1066
sqlite3_result_text(context, blob_str(&x), -1, fossil_free);
1067
}
1068
#else
1069
sqlite3_result_double(context, r);
1070
#endif
1071
}
1072
1073
/*
1074
** Expects a search pattern string. Makes a copy of the string,
1075
** replaces all non-alphanum ASCII characters with a space, and
1076
** lower-cases all upper-case ASCII characters. The intent is to avoid
1077
** causing errors in FTS5 searches with inputs which contain AND, OR,
1078
** and symbols like #. The caller is responsible for passing the
1079
** result to fossil_free().
1080
*/
1081
char *search_simplify_pattern(const char * zPattern){
1082
char *zPat = mprintf("%s",zPattern);
1083
int i;
1084
for(i=0; zPat[i]; i++){
1085
if( (zPat[i]&0x80)==0 && !fossil_isalnum(zPat[i]) ) zPat[i] = ' ';
1086
if( fossil_isupper(zPat[i]) ) zPat[i] = fossil_tolower(zPat[i]);
1087
}
1088
for(i--; i>=0 && zPat[i]==' '; i--){}
1089
if( i<0 ){
1090
fossil_free(zPat);
1091
zPat = mprintf("\"\"");
1092
}
1093
return zPat;
1094
}
1095
1096
/*
1097
** When this routine is called, there already exists a table
1098
**
1099
** x(label,url,score,id,snip).
1100
**
1101
** label: The "name" of the document containing the match
1102
** url: A URL for the document
1103
** score: How well the document matched
1104
** id: The document id. Format: xNNNNN, x: type, N: number
1105
** snip: A snippet for the match
1106
**
1107
** And the srchFlags parameter has been validated. This routine
1108
** fills the X table with search results using FTS indexed search.
1109
**
1110
** The companion full-scan search routine is search_fullscan().
1111
*/
1112
LOCAL void search_indexed(
1113
const char *zPattern, /* The query pattern */
1114
unsigned int srchFlags /* What to search over */
1115
){
1116
Blob sql;
1117
char *zPat;
1118
static const char *zSnippetCall;
1119
if( srchFlags==0 ) return;
1120
sqlite3_create_function(g.db, "rank", 1, SQLITE_UTF8|SQLITE_INNOCUOUS, 0,
1121
search_rank_sqlfunc, 0, 0);
1122
zPat = search_simplify_pattern(zPattern);
1123
blob_init(&sql, 0, 0);
1124
if( search_index_type(0)==4 ){
1125
/* If this repo is still using the legacy FTS5 search index, then
1126
** the snippet() function is slightly different */
1127
zSnippetCall = "snippet(ftsidx,'<mark>','</mark>',' ... ',-1,35)";
1128
}else{
1129
/* This is the common case - Using newer FTS5 search index */
1130
zSnippetCall = "snippet(ftsidx,-1,'<mark>','</mark>',' ... ',35)";
1131
}
1132
blob_appendf(&sql,
1133
"INSERT INTO x(label,url,score,id,date,snip) "
1134
" SELECT ftsdocs.label,"
1135
" ftsdocs.url,"
1136
" rank(matchinfo(ftsidx,'pcsx')),"
1137
" ftsdocs.type || ftsdocs.rid,"
1138
" datetime(ftsdocs.mtime),"
1139
" %s"
1140
" FROM ftsidx CROSS JOIN ftsdocs"
1141
" WHERE ftsidx MATCH %Q"
1142
" AND ftsdocs.rowid=ftsidx.rowid",
1143
zSnippetCall /*safe-for-%s*/, zPat
1144
);
1145
fossil_free(zPat);
1146
if( srchFlags!=SRCH_ALL ){
1147
const char *zSep = " AND (";
1148
static const struct { unsigned m; char c; } aMask[] = {
1149
{ SRCH_CKIN, 'c' },
1150
{ SRCH_DOC, 'd' },
1151
{ SRCH_TKT, 't' },
1152
{ SRCH_WIKI, 'w' },
1153
{ SRCH_TECHNOTE, 'e' },
1154
{ SRCH_FORUM, 'f' },
1155
{ SRCH_HELP, 'h' },
1156
};
1157
int i;
1158
for(i=0; i<count(aMask); i++){
1159
if( srchFlags & aMask[i].m ){
1160
blob_appendf(&sql, "%sftsdocs.type='%c'", zSep, aMask[i].c);
1161
zSep = " OR ";
1162
}
1163
}
1164
blob_append(&sql,")",1);
1165
}
1166
db_multi_exec("%s",blob_str(&sql)/*safe-for-%s*/);
1167
#if SEARCH_DEBUG_RANK
1168
db_multi_exec("UPDATE x SET label=printf('%%s (score=%%s)',label,score)");
1169
#endif
1170
}
1171
1172
/*
1173
** If z[] is of the form "<mark>TEXT</mark>" where TEXT contains
1174
** no white-space or punctuation, then return the length of the mark.
1175
*/
1176
static int isSnippetMark(const char *z){
1177
int n;
1178
if( strncmp(z,"<mark>",6)!=0 ) return 0;
1179
n = 6;
1180
while( fossil_isalnum(z[n]) ) n++;
1181
if( strncmp(&z[n],"</mark>",7)!=0 ) return 0;
1182
return n+7;
1183
}
1184
1185
/*
1186
** Return a copy of zSnip (in memory obtained from fossil_malloc()) that
1187
** has all "<" characters, other than those on <mark> and </mark>,
1188
** converted into "&lt;". This is similar to htmlize() except that
1189
** <mark> and </mark> are preserved.
1190
*/
1191
static char *cleanSnippet(const char *zSnip){
1192
int i;
1193
int n = 0;
1194
char *z;
1195
if( zSnip==0 ) zSnip = "";
1196
for(i=0; zSnip[i]; i++) if( zSnip[i]=='<' ) n++;
1197
z = fossil_malloc( i+n*4+1 );
1198
i = 0;
1199
while( zSnip[0] ){
1200
if( zSnip[0]=='<' ){
1201
n = isSnippetMark(zSnip);
1202
if( n ){
1203
memcpy(&z[i], zSnip, n);
1204
zSnip += n;
1205
i += n;
1206
continue;
1207
}else{
1208
memcpy(&z[i], "&lt;", 4);
1209
i += 4;
1210
zSnip++;
1211
}
1212
}else{
1213
z[i++] = zSnip[0];
1214
zSnip++;
1215
}
1216
}
1217
z[i] = 0;
1218
return z;
1219
}
1220
1221
1222
/*
1223
** This routine generates web-page output for a search operation.
1224
** Other web-pages can invoke this routine to add search results
1225
** in the middle of the page.
1226
**
1227
** This routine works for both full-scan and indexed search. The
1228
** appropriate low-level search routine is called according to the
1229
** current configuration.
1230
**
1231
** Return the number of rows.
1232
*/
1233
int search_run_and_output(
1234
const char *zPattern, /* The query pattern */
1235
unsigned int srchFlags, /* What to search over */
1236
int fDebug /* Extra debugging output */
1237
){
1238
Stmt q;
1239
int nRow = 0;
1240
int nLimit = db_get_int("search-limit", 100);
1241
1242
if( P("searchlimit")!=0 ){
1243
nLimit = atoi(P("searchlimit"));
1244
}
1245
srchFlags = search_restrict(srchFlags) | (srchFlags & SRCH_HELP);
1246
if( srchFlags==0 ) return 0;
1247
search_sql_setup(g.db);
1248
add_content_sql_commands(g.db);
1249
db_multi_exec(
1250
"CREATE TEMP TABLE x(label,url,score,id,date,snip);"
1251
);
1252
if( !search_index_exists() ){
1253
search_fullscan(zPattern, srchFlags); /* Full-scan search */
1254
}else{
1255
search_update_index(srchFlags); /* Update the index, if necessary */
1256
search_indexed(zPattern, srchFlags); /* Indexed search */
1257
if( srchFlags & SRCH_HELP ){
1258
search_fullscan(zPattern, SRCH_HELP);
1259
}
1260
}
1261
db_prepare(&q, "SELECT url, snip, label, score, id, substr(date,1,10)"
1262
" FROM x"
1263
" ORDER BY score DESC, date DESC;");
1264
while( db_step(&q)==SQLITE_ROW ){
1265
const char *zUrl = db_column_text(&q, 0);
1266
const char *zSnippet = db_column_text(&q, 1);
1267
const char *zLabel = db_column_text(&q, 2);
1268
const char *zDate = db_column_text(&q, 5);
1269
if( nRow==0 ){
1270
@ <ol>
1271
}
1272
nRow++;
1273
@ <li><p><a href='%R%s(zUrl)'>%h(zLabel)</a>
1274
if( fDebug ){
1275
@ (%e(db_column_double(&q,3)), %s(db_column_text(&q,4))
1276
}
1277
@ <br><span class='snippet'>%z(cleanSnippet(zSnippet)) \
1278
if( zLabel && zDate && zDate[0] && strstr(zLabel,zDate)==0 ){
1279
@ <small>(%h(zDate))</small>
1280
}
1281
@ </span></li>
1282
if( nLimit && nRow>=nLimit ) break;
1283
}
1284
db_finalize(&q);
1285
if( nRow ){
1286
@ </ol>
1287
}
1288
return nRow;
1289
}
1290
1291
/*
1292
** Generate some HTML for doing search. At a minimum include the
1293
** Search-Text entry form. If the "s" query parameter is present, also
1294
** show search results.
1295
**
1296
** The srchFlags parameter restricts the set of documents to be searched.
1297
** srchFlags should normally be either a single search category or all
1298
** categories. Any srchFlags with two or more bits set
1299
** is treated like SRCH_ALL for display purposes.
1300
**
1301
** This routine automatically restricts srchFlag according to user
1302
** permissions and the server configuration. The entry box is shown
1303
** disabled if srchFlags is 0 after these restrictions are applied.
1304
**
1305
** The mFlags value controls options:
1306
**
1307
** 0x01 If the y= query parameter is present, use it as an addition
1308
** restriction what to search.
1309
**
1310
** 0x02 Show nothing if search is disabled.
1311
**
1312
** Return true if there are search results.
1313
*/
1314
int search_screen(unsigned srchAllowed, int mFlags){
1315
const char *zType = 0;
1316
const char *zClass = 0;
1317
const char *zDisable1;
1318
const char *zDisable2;
1319
const char *zPattern;
1320
int fDebug = PB("debug");
1321
int haveResult = 0;
1322
int srchThisTime;
1323
const char *zY = PD("y","all");
1324
if( zY[0]=='h' && zY[1]==0 ){
1325
srchAllowed = search_restrict(srchAllowed) | (srchAllowed & SRCH_HELP);
1326
}else{
1327
srchAllowed = search_restrict(srchAllowed);
1328
}
1329
switch( srchAllowed ){
1330
case SRCH_CKIN: zType = " Check-ins"; zClass = "Ckin"; break;
1331
case SRCH_DOC: zType = " Docs"; zClass = "Doc"; break;
1332
case SRCH_TKT: zType = " Tickets"; zClass = "Tkt"; break;
1333
case SRCH_WIKI: zType = " Wiki"; zClass = "Wiki"; break;
1334
case SRCH_TECHNOTE: zType = " Tech Notes"; zClass = "Note"; break;
1335
case SRCH_FORUM: zType = " Forum"; zClass = "Frm"; break;
1336
case SRCH_HELP: zType = " Help"; zClass = "Hlp"; break;
1337
}
1338
if( srchAllowed==0 ){
1339
if( mFlags & 0x02 ) return 0;
1340
zDisable1 = " disabled";
1341
zDisable2 = " disabled";
1342
zPattern = "";
1343
}else{
1344
zDisable1 = ""; /* Was: " autofocus" */
1345
zDisable2 = "";
1346
zPattern = PD("s","");
1347
}
1348
@ <form method='GET' action='%R/%T(g.zPath)'>
1349
if( zClass ){
1350
@ <div class='searchForm searchForm%s(zClass)'>
1351
}else{
1352
@ <div class='searchForm'>
1353
}
1354
@ <input type="text" name="s" size="40" value="%h(zPattern)"%s(zDisable1)>
1355
srchThisTime = srchAllowed;
1356
if( (mFlags & 0x01)!=0 && (srchAllowed & (srchAllowed-1))!=0 ){
1357
static const struct {
1358
const char *z;
1359
const char *zNm;
1360
unsigned m;
1361
} aY[] = {
1362
{ "all", "All", SRCH_ALL },
1363
{ "c", "Check-ins", SRCH_CKIN },
1364
{ "d", "Docs", SRCH_DOC },
1365
{ "t", "Tickets", SRCH_TKT },
1366
{ "w", "Wiki", SRCH_WIKI },
1367
{ "e", "Tech Notes", SRCH_TECHNOTE },
1368
{ "f", "Forum", SRCH_FORUM },
1369
{ "h", "Help", SRCH_HELP },
1370
};
1371
int i;
1372
@ <select size='1' name='y'>
1373
for(i=0; i<count(aY); i++){
1374
if( (aY[i].m & srchAllowed)==0 ) continue;
1375
if( aY[i].m==SRCH_HELP && fossil_strcmp(zY,"h")!=0
1376
&& search_restrict(SRCH_HELP)==0 ) continue;
1377
cgi_printf("<option value='%s'", aY[i].z);
1378
if( fossil_strcmp(zY,aY[i].z)==0 ){
1379
srchThisTime &= aY[i].m;
1380
cgi_printf(" selected");
1381
}
1382
cgi_printf(">%s</option>\n", aY[i].zNm);
1383
}
1384
@ </select>
1385
}
1386
if( fDebug ){
1387
@ <input type="hidden" name="debug" value="1">
1388
}
1389
@ <input type="submit" value="Search%s(zType)"%s(zDisable2)>
1390
if( srchAllowed==0 && srchThisTime==0 ){
1391
@ <p class="generalError">Search is disabled</p>
1392
}
1393
@ </div></form>
1394
while( fossil_isspace(zPattern[0]) ) zPattern++;
1395
if( zPattern[0] ){
1396
if( zClass ){
1397
@ <div class='searchResult searchResult%s(zClass)'>
1398
}else{
1399
@ <div class='searchResult'>
1400
}
1401
if( search_run_and_output(zPattern, srchThisTime, fDebug)==0 ){
1402
@ <p class='searchEmpty'>No matches for: <span>%h(zPattern)</span></p>
1403
}
1404
@ </div>
1405
haveResult = 1;
1406
}
1407
return haveResult;
1408
}
1409
1410
/*
1411
** WEBPAGE: search
1412
**
1413
** Search for check-in comments, documents, tickets, or wiki that
1414
** match a user-supplied pattern.
1415
**
1416
** s=PATTERN Specify the full-text pattern to search for
1417
** y=TYPE What to search.
1418
** c -> check-ins,
1419
** d -> documentation,
1420
** t -> tickets,
1421
** w -> wiki,
1422
** e -> tech notes,
1423
** f -> forum,
1424
** h -> built-in help,
1425
** all -> everything.
1426
*/
1427
void search_page(void){
1428
const int isSearch = P("s")!=0;
1429
login_check_credentials();
1430
style_header("Search%s", isSearch ? " Results" : "");
1431
cgi_check_for_malice();
1432
search_screen(SRCH_ALL, 1);
1433
style_finish_page();
1434
}
1435
1436
1437
/*
1438
** This is a helper function for search_stext(). Writing into pOut
1439
** the search text obtained from pIn according to zMimetype.
1440
**
1441
** If a title is not specified in zTitle (e.g. for wiki pages that do not
1442
** include the title in the body), it is determined from the page content.
1443
**
1444
** The title of the document is the first line of text. All subsequent
1445
** lines are the body. If the document has no title, the first line
1446
** is blank.
1447
*/
1448
static void get_stext_by_mimetype(
1449
Blob *pIn,
1450
const char *zMimetype,
1451
const char *zTitle,
1452
Blob *pOut
1453
){
1454
Blob html, title;
1455
Blob *pHtml = &html;
1456
blob_init(&html, 0, 0);
1457
if( zTitle==0 ){
1458
blob_init(&title, 0, 0);
1459
}else{
1460
blob_init(&title, zTitle, -1);
1461
}
1462
if( zMimetype==0 ) zMimetype = "text/plain";
1463
if( fossil_strcmp(zMimetype,"text/x-fossil-wiki")==0 ){
1464
if( blob_size(&title) ){
1465
wiki_convert(pIn, &html, 0);
1466
}else{
1467
Blob tail;
1468
blob_init(&tail, 0, 0);
1469
if( wiki_find_title(pIn, &title, &tail) ){
1470
blob_appendf(pOut, "%s\n", blob_str(&title));
1471
wiki_convert(&tail, &html, 0);
1472
blob_reset(&tail);
1473
}else{
1474
blob_append(pOut, "\n", 1);
1475
wiki_convert(pIn, &html, 0);
1476
}
1477
}
1478
html_to_plaintext(blob_str(&html), pOut, 0);
1479
}else if( fossil_strcmp(zMimetype,"text/x-markdown")==0 ){
1480
markdown_to_html(pIn, blob_size(&title) ? NULL : &title, &html);
1481
}else if( fossil_strcmp(zMimetype,"text/html")==0 ){
1482
if( blob_size(&title)==0 ) doc_is_embedded_html(pIn, &title);
1483
pHtml = pIn;
1484
}
1485
blob_appendf(pOut, "%s\n", blob_str(&title));
1486
if( blob_size(pHtml) ){
1487
html_to_plaintext(blob_str(pHtml), pOut, 0);
1488
}else{
1489
blob_append(pOut, blob_buffer(pIn), blob_size(pIn));
1490
}
1491
blob_reset(&html);
1492
blob_reset(&title);
1493
}
1494
1495
/*
1496
** Query pQuery is pointing at a single row of output. Append a text
1497
** representation of every text-compatible column to pAccum.
1498
*/
1499
static void append_all_ticket_fields(Blob *pAccum, Stmt *pQuery, int iTitle){
1500
int n = db_column_count(pQuery);
1501
int i;
1502
const char *zMime = 0;
1503
if( iTitle>=0 && iTitle<n ){
1504
if( db_column_type(pQuery,iTitle)==SQLITE_TEXT ){
1505
blob_append(pAccum, db_column_text(pQuery,iTitle), -1);
1506
}
1507
blob_append(pAccum, "\n", 1);
1508
}
1509
for(i=0; i<n; i++){
1510
const char *zColName = db_column_name(pQuery,i);
1511
int eType = db_column_type(pQuery,i);
1512
if( i==iTitle ) continue;
1513
if( fossil_strnicmp(zColName,"tkt_",4)==0 ) continue;
1514
if( fossil_strnicmp(zColName,"private_",8)==0 ) continue;
1515
if( eType==SQLITE_BLOB || eType==SQLITE_NULL ) continue;
1516
if( fossil_stricmp(zColName,"mimetype")==0 ){
1517
zMime = db_column_text(pQuery,i);
1518
if( fossil_strcmp(zMime,"text/plain")==0 ) zMime = 0;
1519
}else if( zMime==0 || eType!=SQLITE_TEXT ){
1520
blob_appendf(pAccum, "%s: %s |\n", zColName, db_column_text(pQuery,i));
1521
}else{
1522
Blob txt;
1523
blob_init(&txt, db_column_text(pQuery,i), -1);
1524
blob_appendf(pAccum, "%s: ", zColName);
1525
get_stext_by_mimetype(&txt, zMime, NULL, pAccum);
1526
blob_append(pAccum, " |", 2);
1527
blob_reset(&txt);
1528
}
1529
}
1530
}
1531
1532
1533
/*
1534
** Return "search text" - a reduced version of a document appropriate for
1535
** full text search and/or for constructing a search result snippet.
1536
**
1537
** cType: d Embedded documentation
1538
** w Wiki page
1539
** c Check-in comment
1540
** t Ticket text
1541
** e Tech note
1542
** f Forum
1543
**
1544
** rid The RID of an artifact that defines the object
1545
** being searched.
1546
**
1547
** zName Name of the object being searched. This is used
1548
** only to help figure out the mimetype (text/plain,
1549
** test/html, test/x-fossil-wiki, or text/x-markdown)
1550
** so that the code can know how to simplify the text.
1551
*/
1552
void search_stext(
1553
char cType, /* Type of document */
1554
int rid, /* BLOB.RID or TAG.TAGID value for document */
1555
const char *zName, /* Auxiliary information */
1556
Blob *pOut /* OUT: Initialize to the search text */
1557
){
1558
blob_init(pOut, 0, 0);
1559
switch( cType ){
1560
case 'd': { /* Documents */
1561
Blob doc;
1562
content_get(rid, &doc);
1563
blob_to_utf8_no_bom(&doc, 0);
1564
get_stext_by_mimetype(&doc, mimetype_from_name(zName), NULL, pOut);
1565
blob_reset(&doc);
1566
break;
1567
}
1568
case 'f': /* Forum messages */
1569
case 'e': /* Tech Notes */
1570
case 'w': { /* Wiki */
1571
Manifest *pWiki = manifest_get(rid,
1572
cType == 'e' ? CFTYPE_EVENT :
1573
cType == 'f' ? CFTYPE_FORUM : CFTYPE_WIKI, 0);
1574
Blob wiki;
1575
if( pWiki==0 ) break;
1576
if( cType=='f' ){
1577
blob_init(&wiki, 0, 0);
1578
if( pWiki->zThreadTitle ){
1579
blob_appendf(&wiki, "<h1>%h</h1>\n", pWiki->zThreadTitle);
1580
}
1581
blob_appendf(&wiki, "From %s:\n\n%s", pWiki->zUser, pWiki->zWiki);
1582
}else{
1583
blob_init(&wiki, pWiki->zWiki, -1);
1584
}
1585
get_stext_by_mimetype(&wiki, wiki_filter_mimetypes(pWiki->zMimetype),
1586
cType=='w' ? pWiki->zWikiTitle : NULL, pOut);
1587
blob_reset(&wiki);
1588
manifest_destroy(pWiki);
1589
break;
1590
}
1591
case 'c': { /* Check-in Comments */
1592
static Stmt q;
1593
static int isPlainText = -1;
1594
db_static_prepare(&q,
1595
"SELECT coalesce(ecomment,comment)"
1596
" ||' (user: '||coalesce(euser,user,'?')"
1597
" ||', tags: '||"
1598
" (SELECT group_concat(substr(tag.tagname,5),',')"
1599
" FROM tag, tagxref"
1600
" WHERE tagname GLOB 'sym-*' AND tag.tagid=tagxref.tagid"
1601
" AND tagxref.rid=event.objid AND tagxref.tagtype>0)"
1602
" ||')'"
1603
" FROM event WHERE objid=:x AND type='ci'");
1604
if( isPlainText<0 ){
1605
isPlainText = db_get_boolean("timeline-plaintext",0);
1606
}
1607
db_bind_int(&q, ":x", rid);
1608
if( db_step(&q)==SQLITE_ROW ){
1609
blob_append(pOut, "\n", 1);
1610
if( isPlainText ){
1611
db_column_blob(&q, 0, pOut);
1612
}else{
1613
Blob x;
1614
blob_init(&x,0,0);
1615
db_column_blob(&q, 0, &x);
1616
get_stext_by_mimetype(&x, "text/x-fossil-wiki", NULL, pOut);
1617
blob_reset(&x);
1618
}
1619
}
1620
db_reset(&q);
1621
break;
1622
}
1623
case 't': { /* Tickets */
1624
static Stmt q1;
1625
static int iTitle = -1;
1626
db_static_prepare(&q1, "SELECT * FROM ticket WHERE tkt_id=:rid");
1627
db_bind_int(&q1, ":rid", rid);
1628
if( db_step(&q1)==SQLITE_ROW ){
1629
if( iTitle<0 ){
1630
int n = db_column_count(&q1);
1631
for(iTitle=0; iTitle<n; iTitle++){
1632
if( fossil_stricmp(db_column_name(&q1,iTitle),"title")==0 ) break;
1633
}
1634
}
1635
append_all_ticket_fields(pOut, &q1, iTitle);
1636
}
1637
db_reset(&q1);
1638
if( db_table_exists("repository","ticketchng") ){
1639
static Stmt q2;
1640
db_static_prepare(&q2, "SELECT * FROM ticketchng WHERE tkt_id=:rid"
1641
" ORDER BY tkt_mtime");
1642
db_bind_int(&q2, ":rid", rid);
1643
while( db_step(&q2)==SQLITE_ROW ){
1644
append_all_ticket_fields(pOut, &q2, -1);
1645
}
1646
db_reset(&q2);
1647
}
1648
break;
1649
}
1650
}
1651
}
1652
1653
/*
1654
** This routine is a wrapper around search_stext().
1655
**
1656
** This routine looks up the search text, stores it in an internal
1657
** buffer, and returns a pointer to the text. Subsequent requests
1658
** for the same document return the same pointer. The returned pointer
1659
** is valid until the next invocation of this routine. Call this routine
1660
** with an eType of 0 to clear the cache.
1661
*/
1662
char *search_stext_cached(
1663
char cType, /* Type of document */
1664
int rid, /* BLOB.RID or TAG.TAGID value for document */
1665
const char *zName, /* Auxiliary information, for mimetype */
1666
int *pnTitle /* OUT: length of title in bytes excluding \n */
1667
){
1668
static struct {
1669
Blob stext; /* Cached search text */
1670
char cType; /* The type */
1671
int rid; /* The RID */
1672
int nTitle; /* Number of bytes in title */
1673
} cache;
1674
int i;
1675
char *z;
1676
if( cType!=cache.cType || rid!=cache.rid ){
1677
if( cache.rid>0 ){
1678
blob_reset(&cache.stext);
1679
}else{
1680
blob_init(&cache.stext,0,0);
1681
}
1682
cache.cType = cType;
1683
cache.rid = rid;
1684
if( cType==0 ) return 0;
1685
search_stext(cType, rid, zName, &cache.stext);
1686
z = blob_str(&cache.stext);
1687
for(i=0; z[i] && z[i]!='\n'; i++){}
1688
cache.nTitle = i;
1689
}
1690
if( pnTitle ) *pnTitle = cache.nTitle;
1691
return blob_str(&cache.stext);
1692
}
1693
1694
/*
1695
** COMMAND: test-search-stext
1696
**
1697
** Usage: fossil test-search-stext TYPE RID NAME
1698
**
1699
** Compute the search text for document TYPE-RID whose name is NAME.
1700
** The TYPE is one of "c", "d", "t", "w", or "e". The RID is the document
1701
** ID. The NAME is used to figure out a mimetype to use for formatting
1702
** the raw document text.
1703
*/
1704
void test_search_stext(void){
1705
Blob out;
1706
db_find_and_open_repository(0,0);
1707
if( g.argc!=5 ) usage("TYPE RID NAME");
1708
search_stext(g.argv[2][0], atoi(g.argv[3]), g.argv[4], &out);
1709
fossil_print("%s\n",blob_str(&out));
1710
blob_reset(&out);
1711
}
1712
1713
/*
1714
** COMMAND: test-convert-stext
1715
**
1716
** Usage: fossil test-convert-stext FILE MIMETYPE
1717
**
1718
** Read the content of FILE and convert it to stext according to MIMETYPE.
1719
** Send the result to standard output.
1720
*/
1721
void test_convert_stext(void){
1722
Blob in, out;
1723
db_find_and_open_repository(0,0);
1724
if( g.argc!=4 ) usage("FILENAME MIMETYPE");
1725
blob_read_from_file(&in, g.argv[2], ExtFILE);
1726
blob_init(&out, 0, 0);
1727
get_stext_by_mimetype(&in, g.argv[3], NULL, &out);
1728
fossil_print("%s\n",blob_str(&out));
1729
blob_reset(&in);
1730
blob_reset(&out);
1731
}
1732
1733
/*
1734
** The schema for the full-text index. The %s part must be an empty
1735
** string or a comma followed by additional flags for the FTS virtual
1736
** table.
1737
*/
1738
static const char zFtsSchema[] =
1739
@ -- One entry for each possible search result
1740
@ CREATE TABLE IF NOT EXISTS repository.ftsdocs(
1741
@ rowid INTEGER PRIMARY KEY, -- Maps to the ftsidx.rowid
1742
@ type CHAR(1), -- Type of document
1743
@ rid INTEGER, -- BLOB.RID or TAG.TAGID for the document
1744
@ name TEXT, -- Additional document description
1745
@ idxed BOOLEAN, -- True if currently in the index
1746
@ label TEXT, -- Label to print on search results
1747
@ url TEXT, -- URL to access this document
1748
@ mtime DATE, -- Date when document created
1749
@ bx TEXT, -- Temporary "body" content cache
1750
@ UNIQUE(type,rid)
1751
@ );
1752
@ CREATE INDEX repository.ftsdocIdxed ON ftsdocs(type,rid,name) WHERE idxed==0;
1753
@ CREATE INDEX repository.ftsdocName ON ftsdocs(name) WHERE type='w';
1754
@ CREATE VIEW IF NOT EXISTS repository.ftscontent AS
1755
@ SELECT rowid, type, rid, name, idxed, label, url, mtime,
1756
@ title(type,rid,name) AS 'title', body(type,rid,name) AS 'body'
1757
@ FROM ftsdocs;
1758
@ CREATE VIRTUAL TABLE IF NOT EXISTS repository.ftsidx
1759
@ USING fts5(content="ftscontent", title, body%s);
1760
;
1761
static const char zFtsDrop[] =
1762
@ DROP TABLE IF EXISTS repository.ftsidx;
1763
@ DROP VIEW IF EXISTS repository.ftscontent;
1764
@ DROP TABLE IF EXISTS repository.ftsdocs;
1765
@ DROP TABLE IF EXISTS repository.chatfts1;
1766
;
1767
1768
#if INTERFACE
1769
/*
1770
** Values for the search-tokenizer config option.
1771
*/
1772
#define FTS5TOK_NONE 0 /* disabled */
1773
#define FTS5TOK_PORTER 1 /* porter stemmer */
1774
#define FTS5TOK_UNICODE61 2 /* unicode61 tokenizer */
1775
#define FTS5TOK_TRIGRAM 3 /* trigram tokenizer */
1776
#endif
1777
1778
/*
1779
** Cached FTS5TOK_xyz value for search_tokenizer_type() and
1780
** friends.
1781
*/
1782
static int iFtsTokenizer = -1;
1783
1784
/*
1785
** Returns one of the FTS5TOK_xyz values, depending on the value of
1786
** the search-tokenizer config entry, defaulting to FTS5TOK_NONE. The
1787
** result of the first call is cached for subsequent calls unless
1788
** bRecheck is true.
1789
*/
1790
int search_tokenizer_type(int bRecheck){
1791
char *z;
1792
if( iFtsTokenizer>=0 && bRecheck==0 ){
1793
return iFtsTokenizer;
1794
}
1795
z = db_get("search-tokenizer",0);
1796
if( 0==z ){
1797
iFtsTokenizer = FTS5TOK_NONE;
1798
}else if(0==fossil_strcmp(z,"porter")){
1799
iFtsTokenizer = FTS5TOK_PORTER;
1800
}else if(0==fossil_strcmp(z,"unicode61")){
1801
iFtsTokenizer = FTS5TOK_UNICODE61;
1802
}else if(0==fossil_strcmp(z,"trigram")){
1803
iFtsTokenizer = FTS5TOK_TRIGRAM;
1804
}else{
1805
iFtsTokenizer = is_truth(z) ? FTS5TOK_PORTER : FTS5TOK_NONE;
1806
}
1807
fossil_free(z);
1808
return iFtsTokenizer;
1809
}
1810
1811
/*
1812
** Returns a string in the form ",tokenize=X", where X is the string
1813
** counterpart of the given FTS5TOK_xyz value. Returns "" if tokType
1814
** does not correspond to a known FTS5 tokenizer.
1815
*/
1816
const char * search_tokenize_arg_for_type(int tokType){
1817
switch( tokType ){
1818
case FTS5TOK_PORTER: return ",tokenize=porter";
1819
case FTS5TOK_UNICODE61: return ",tokenize=unicode61";
1820
case FTS5TOK_TRIGRAM: return ",tokenize=trigram";
1821
case FTS5TOK_NONE:
1822
default: return "";
1823
}
1824
}
1825
1826
/*
1827
** Returns a string value suitable for use as the search-tokenizer
1828
** setting's value, depending on the value of z. If z is 0 then the
1829
** current search-tokenizer value is used as the basis for formulating
1830
** the result (which may differ from the current value but will have
1831
** the same meaning). Any unknown/unsupported value is interpreted as
1832
** "off".
1833
*/
1834
const char *search_tokenizer_for_string(const char *z){
1835
char * zTmp = 0;
1836
const char *zRc = 0;
1837
1838
if( 0==z ){
1839
z = zTmp = db_get("search-tokenizer",0);
1840
}
1841
if( 0==z ){
1842
zRc = "off";
1843
}else if( 0==fossil_strcmp(z,"porter") ){
1844
zRc = "porter";
1845
}else if( 0==fossil_strcmp(z,"unicode61") ){
1846
zRc = "unicode61";
1847
}else if( 0==fossil_strcmp(z,"trigram") ){
1848
zRc = "trigram";
1849
}else{
1850
zRc = is_truth(z) ? "porter" : "off";
1851
}
1852
fossil_free(zTmp);
1853
return zRc;
1854
}
1855
1856
/*
1857
** Sets the search-tokenizer config setting to the value of
1858
** search_tokenizer_for_string(zName).
1859
*/
1860
void search_set_tokenizer(const char *zName){
1861
db_set("search-tokenizer", search_tokenizer_for_string( zName ), 0);
1862
iFtsTokenizer = -1;
1863
}
1864
1865
/*
1866
** Create or drop the tables associated with a full-text index.
1867
*/
1868
static int searchIdxExists = -1;
1869
void search_create_index(void){
1870
const char *zExtra =
1871
search_tokenize_arg_for_type(search_tokenizer_type(0));
1872
assert( zExtra );
1873
search_sql_setup(g.db);
1874
db_multi_exec(zFtsSchema/*works-like:"%s"*/, zExtra/*safe-for-%s*/);
1875
searchIdxExists = 1;
1876
}
1877
void search_drop_index(void){
1878
db_multi_exec(zFtsDrop/*works-like:""*/);
1879
searchIdxExists = 0;
1880
}
1881
1882
/*
1883
** Return true if the full-text search index exists. See also the
1884
** search_index_type() function.
1885
*/
1886
int search_index_exists(void){
1887
if( searchIdxExists<0 ){
1888
searchIdxExists = db_table_exists("repository","ftsdocs");
1889
}
1890
return searchIdxExists;
1891
}
1892
1893
/*
1894
** Determine which full-text search index is currently being used to
1895
** add searching. Return values:
1896
**
1897
** 0 No search index is available
1898
** 4 FTS3/4
1899
** 5 FTS5
1900
**
1901
** Results are cached. Make the argument 1 to reset the cache. See
1902
** also the search_index_exists() routine.
1903
*/
1904
int search_index_type(int bReset){
1905
static int idxType = -1;
1906
if( idxType<0 || bReset ){
1907
idxType = db_int(0,
1908
"SELECT CASE WHEN sql GLOB '*fts4*' THEN 4 ELSE 5 END"
1909
" FROM repository.sqlite_schema WHERE name='ftsidx'"
1910
);
1911
}
1912
return idxType;
1913
}
1914
1915
/*
1916
** Fill the FTSDOCS table with unindexed entries for everything
1917
** in the repository. This uses INSERT OR IGNORE so entries already
1918
** in FTSDOCS are unchanged.
1919
*/
1920
void search_fill_index(void){
1921
if( !search_index_exists() ) return;
1922
search_sql_setup(g.db);
1923
db_multi_exec(
1924
"INSERT OR IGNORE INTO ftsdocs(type,rid,idxed)"
1925
" SELECT 'c', objid, 0 FROM event WHERE type='ci';"
1926
);
1927
db_multi_exec(
1928
"WITH latest_wiki(rid,name,mtime) AS ("
1929
" SELECT tagxref.rid, substr(tag.tagname,6), max(tagxref.mtime)"
1930
" FROM tag, tagxref"
1931
" WHERE tag.tagname GLOB 'wiki-*'"
1932
" AND tagxref.tagid=tag.tagid"
1933
" AND tagxref.value>0"
1934
" GROUP BY 2"
1935
") INSERT OR IGNORE INTO ftsdocs(type,rid,name,idxed)"
1936
" SELECT 'w', rid, name, 0 FROM latest_wiki;"
1937
);
1938
db_multi_exec(
1939
"INSERT OR IGNORE INTO ftsdocs(type,rid,idxed)"
1940
" SELECT 't', tkt_id, 0 FROM ticket;"
1941
);
1942
db_multi_exec(
1943
"INSERT OR IGNORE INTO ftsdocs(type,rid,name,idxed)"
1944
" SELECT type, objid, comment, 0 FROM event WHERE type IN ('e','f');"
1945
);
1946
}
1947
1948
/*
1949
** The document described by cType,rid,zName is about to be added or
1950
** updated. If the document has already been indexed, then unindex it
1951
** now while we still have access to the old content. Add the document
1952
** to the queue of documents that need to be indexed or reindexed.
1953
*/
1954
void search_doc_touch(char cType, int rid, const char *zName){
1955
if( search_index_exists() && !content_is_private(rid) ){
1956
char zType[2];
1957
zType[0] = cType;
1958
zType[1] = 0;
1959
search_sql_setup(g.db);
1960
db_multi_exec(
1961
"DELETE FROM ftsidx WHERE rowid IN"
1962
" (SELECT rowid FROM ftsdocs WHERE type=%Q AND rid=%d AND idxed)",
1963
zType, rid
1964
);
1965
db_multi_exec(
1966
"REPLACE INTO ftsdocs(type,rid,name,idxed)"
1967
" VALUES(%Q,%d,%Q,0)",
1968
zType, rid, zName
1969
);
1970
if( cType=='w' || cType=='e' ){
1971
db_multi_exec(
1972
"DELETE FROM ftsidx WHERE rowid IN"
1973
" (SELECT rowid FROM ftsdocs WHERE type='%c' AND name=%Q AND idxed)",
1974
cType, zName
1975
);
1976
db_multi_exec(
1977
"DELETE FROM ftsdocs WHERE type='%c' AND name=%Q AND rid!=%d",
1978
cType, zName, rid
1979
);
1980
}
1981
/* All forum posts are always indexed */
1982
}
1983
}
1984
1985
/*
1986
** If the doc-glob and doc-br settings are valid for document search
1987
** and if the latest check-in on doc-br is in the unindexed set of
1988
** check-ins, then update all 'd' entries in FTSDOCS that have
1989
** changed.
1990
*/
1991
static void search_update_doc_index(void){
1992
const char *zMainBranch = db_main_branch();
1993
const char *zDocBranches = db_get("doc-branch", zMainBranch);
1994
int i;
1995
Glob * pGlob = glob_create(zDocBranches)
1996
/* We're misusing a Glob as a list of comma-/space-delimited
1997
** tokens. We're not actually doing glob matches here. */;
1998
if( !pGlob ) return;
1999
db_multi_exec(
2000
"CREATE TEMP TABLE current_docs(rid INTEGER PRIMARY KEY, name);"
2001
"CREATE VIRTUAL TABLE IF NOT EXISTS temp.foci USING files_of_checkin;"
2002
);
2003
for( i = 0; i < pGlob->nPattern; ++i ){
2004
const char *zDocBr = pGlob->azPattern[i];
2005
int ckid = symbolic_name_to_rid(zDocBr,"ci");
2006
double rTime;
2007
if( !db_exists("SELECT 1 FROM ftsdocs WHERE type='c' AND rid=%d"
2008
" AND NOT idxed", ckid) ) continue;
2009
/* If we get this far, it means that changes to 'd' entries are
2010
** required. */
2011
rTime = db_double(0.0, "SELECT mtime FROM event WHERE objid=%d", ckid);
2012
db_multi_exec(
2013
"INSERT OR IGNORE INTO current_docs(rid, name)"
2014
" SELECT blob.rid, foci.filename FROM foci, blob"
2015
" WHERE foci.checkinID=%d AND blob.uuid=foci.uuid"
2016
" AND %z",
2017
ckid, glob_expr("foci.filename", db_get("doc-glob",""))
2018
);
2019
db_multi_exec(
2020
"DELETE FROM ftsidx WHERE rowid IN"
2021
" (SELECT rowid FROM ftsdocs WHERE type='d'"
2022
" AND rid NOT IN (SELECT rid FROM current_docs))"
2023
);
2024
db_multi_exec(
2025
"DELETE FROM ftsdocs WHERE type='d'"
2026
" AND rid NOT IN (SELECT rid FROM current_docs)"
2027
);
2028
db_multi_exec(
2029
"INSERT OR IGNORE INTO ftsdocs(type,rid,name,idxed,label,bx,url,mtime)"
2030
" SELECT 'd', rid, name, 0,"
2031
" title('d',rid,name),"
2032
" body('d',rid,name),"
2033
" printf('/doc/%T/%%s',urlencode(name)),"
2034
" %.17g"
2035
" FROM current_docs",
2036
zDocBr, rTime
2037
);
2038
db_multi_exec(
2039
"INSERT INTO ftsidx(rowid,title,body)"
2040
" SELECT rowid, label, bx FROM ftsdocs WHERE type='d' AND NOT idxed"
2041
);
2042
db_multi_exec(
2043
"UPDATE ftsdocs SET"
2044
" idxed=1,"
2045
" bx=NULL,"
2046
" label='Document: '||label"
2047
" WHERE type='d' AND NOT idxed"
2048
);
2049
}
2050
glob_free(pGlob);
2051
}
2052
2053
/*
2054
** Deal with all of the unindexed 'c' terms in FTSDOCS
2055
*/
2056
static void search_update_checkin_index(void){
2057
db_multi_exec(
2058
"INSERT INTO ftsidx(rowid,title,body)"
2059
" SELECT rowid, '', body('c',rid,NULL) FROM ftsdocs"
2060
" WHERE type='c' AND NOT idxed;"
2061
);
2062
db_multi_exec(
2063
"UPDATE ftsdocs SET idxed=1, name=NULL,"
2064
" (label,url,mtime) = "
2065
" (SELECT printf('Check-in [%%.16s] on %%s',blob.uuid,"
2066
" datetime(event.mtime)),"
2067
" printf('/timeline?y=ci&c=%%.20s',blob.uuid),"
2068
" event.mtime"
2069
" FROM event, blob"
2070
" WHERE event.objid=ftsdocs.rid"
2071
" AND blob.rid=ftsdocs.rid)"
2072
"WHERE ftsdocs.type='c' AND NOT ftsdocs.idxed"
2073
);
2074
}
2075
2076
/*
2077
** Deal with all of the unindexed 't' terms in FTSDOCS
2078
*/
2079
static void search_update_ticket_index(void){
2080
db_multi_exec(
2081
"INSERT INTO ftsidx(rowid,title,body)"
2082
" SELECT rowid, title('t',rid,NULL), body('t',rid,NULL) FROM ftsdocs"
2083
" WHERE type='t' AND NOT idxed;"
2084
);
2085
if( db_changes()==0 ) return;
2086
db_multi_exec(
2087
"UPDATE ftsdocs SET idxed=1, name=NULL,"
2088
" (label,url,mtime) ="
2089
" (SELECT printf('Ticket: %%s (%%s)',title('t',tkt_id,null),"
2090
" datetime(tkt_mtime)),"
2091
" printf('/tktview/%%.20s',tkt_uuid),"
2092
" tkt_mtime"
2093
" FROM ticket"
2094
" WHERE tkt_id=ftsdocs.rid)"
2095
"WHERE ftsdocs.type='t' AND NOT ftsdocs.idxed"
2096
);
2097
}
2098
2099
/*
2100
** Deal with all of the unindexed 'w' terms in FTSDOCS
2101
*/
2102
static void search_update_wiki_index(void){
2103
db_multi_exec(
2104
"INSERT INTO ftsidx(rowid,title,body)"
2105
" SELECT rowid, title('w',rid,NULL),body('w',rid,NULL) FROM ftsdocs"
2106
" WHERE type='w' AND NOT idxed;"
2107
);
2108
if( db_changes()==0 ) return;
2109
db_multi_exec(
2110
"UPDATE ftsdocs SET idxed=1,"
2111
" (name,label,url,mtime) = "
2112
" (SELECT ftsdocs.name,"
2113
" 'Wiki: '||ftsdocs.name,"
2114
" '/wiki?name='||urlencode(ftsdocs.name),"
2115
" tagxref.mtime"
2116
" FROM tagxref WHERE tagxref.rid=ftsdocs.rid)"
2117
" WHERE ftsdocs.type='w' AND NOT ftsdocs.idxed"
2118
);
2119
}
2120
2121
/*
2122
** Deal with all of the unindexed 'f' terms in FTSDOCS
2123
*/
2124
static void search_update_forum_index(void){
2125
db_multi_exec(
2126
"INSERT INTO ftsidx(rowid,title,body)"
2127
" SELECT rowid, title('f',rid,NULL),body('f',rid,NULL) FROM ftsdocs"
2128
" WHERE type='f' AND NOT idxed;"
2129
);
2130
if( db_changes()==0 ) return;
2131
db_multi_exec(
2132
"UPDATE ftsdocs SET idxed=1, name=NULL,"
2133
" (label,url,mtime) = "
2134
" (SELECT 'Forum '||event.comment,"
2135
" '/forumpost/'||blob.uuid,"
2136
" event.mtime"
2137
" FROM event, blob"
2138
" WHERE event.objid=ftsdocs.rid"
2139
" AND blob.rid=ftsdocs.rid)"
2140
"WHERE ftsdocs.type='f' AND NOT ftsdocs.idxed"
2141
);
2142
}
2143
2144
/*
2145
** Deal with all of the unindexed 'e' terms in FTSDOCS
2146
*/
2147
static void search_update_technote_index(void){
2148
db_multi_exec(
2149
"INSERT INTO ftsidx(rowid,title,body)"
2150
" SELECT rowid, title('e',rid,NULL),body('e',rid,NULL) FROM ftsdocs"
2151
" WHERE type='e' AND NOT idxed;"
2152
);
2153
if( db_changes()==0 ) return;
2154
db_multi_exec(
2155
"UPDATE ftsdocs SET idxed=1,"
2156
" (name,label,url,mtime) = "
2157
" (SELECT ftsdocs.name,"
2158
" 'Tech Note: '||ftsdocs.name,"
2159
" '/technote/'||substr(tag.tagname,7),"
2160
" tagxref.mtime"
2161
" FROM tagxref, tag USING (tagid)"
2162
" WHERE tagxref.rid=ftsdocs.rid"
2163
" AND tagname GLOB 'event-*')"
2164
" WHERE ftsdocs.type='e' AND NOT ftsdocs.idxed"
2165
);
2166
}
2167
2168
/*
2169
** Deal with all of the unindexed entries in the FTSDOCS table - that
2170
** is to say, all the entries with FTSDOCS.IDXED=0. Add them to the
2171
** index.
2172
*/
2173
void search_update_index(unsigned int srchFlags){
2174
if( !search_index_exists() ) return;
2175
if( !db_exists("SELECT 1 FROM ftsdocs WHERE NOT idxed") ) return;
2176
search_sql_setup(g.db);
2177
db_unprotect(PROTECT_READONLY);
2178
if( srchFlags & (SRCH_CKIN|SRCH_DOC) ){
2179
search_update_doc_index();
2180
search_update_checkin_index();
2181
}
2182
if( srchFlags & SRCH_TKT ){
2183
search_update_ticket_index();
2184
}
2185
if( srchFlags & SRCH_WIKI ){
2186
search_update_wiki_index();
2187
}
2188
if( srchFlags & SRCH_TECHNOTE ){
2189
search_update_technote_index();
2190
}
2191
if( srchFlags & SRCH_FORUM ){
2192
search_update_forum_index();
2193
}
2194
db_protect_pop();
2195
}
2196
2197
/*
2198
** Construct, prepopulate, and then update the full-text index.
2199
*/
2200
void search_rebuild_index(void){
2201
fossil_print("rebuilding the search index...");
2202
fflush(stdout);
2203
search_create_index();
2204
search_fill_index();
2205
search_update_index(search_restrict(SRCH_ALL));
2206
if( db_table_exists("repository","chat") ){
2207
chat_rebuild_index(1);
2208
}
2209
fossil_print(" done\n");
2210
}
2211
2212
/*
2213
** COMMAND: fts-config* abbrv-subcom
2214
**
2215
** Usage: fossil fts-config ?SUBCOMMAND? ?ARGUMENT?
2216
**
2217
** The "fossil fts-config" command configures the full-text search capabilities
2218
** of the repository. Subcommands:
2219
**
2220
** reindex Rebuild the search index. This is a no-op if
2221
** index search is disabled
2222
**
2223
** index (on|off) Turn the search index on or off
2224
**
2225
** enable TYPE .. Enable search for TYPE. TYPE is one of:
2226
** check-in, document, ticket, wiki, technote,
2227
** forum, help, or all
2228
**
2229
** disable TYPE ... Disable search for TYPE
2230
**
2231
** tokenizer VALUE Select a tokenizer for indexed search. VALUE
2232
** may be one of (porter, on, off, trigram, unicode61),
2233
** and "on" is equivalent to "porter". Unindexed
2234
** search never uses tokenization or stemming.
2235
**
2236
** The current search settings are displayed after any changes are applied.
2237
** Run this command with no arguments to simply see the settings.
2238
*/
2239
void fts_config_cmd(void){
2240
static const struct {
2241
int iCmd;
2242
const char *z;
2243
} aCmd[] = {
2244
{ 1, "reindex" },
2245
{ 2, "index" },
2246
{ 3, "disable" },
2247
{ 4, "enable" },
2248
{ 5, "tokenizer"},
2249
};
2250
static const struct {
2251
const char *zSetting;
2252
const char *zName;
2253
const char *zSw;
2254
} aSetng[] = {
2255
{ "search-ci", "check-in search:", "c" },
2256
{ "search-doc", "document search:", "d" },
2257
{ "search-tkt", "ticket search:", "t" },
2258
{ "search-wiki", "wiki search:", "w" },
2259
{ "search-technote", "technote search:", "e" },
2260
{ "search-forum", "forum search:", "f" },
2261
{ "search-help", "built-in help search:", "h" },
2262
};
2263
char *zSubCmd = 0;
2264
int i, j, n;
2265
int iCmd = 0;
2266
int iAction = 0;
2267
db_find_and_open_repository(0, 0);
2268
if( g.argc>2 ){
2269
zSubCmd = g.argv[2];
2270
n = (int)strlen(zSubCmd);
2271
for(i=0; i<count(aCmd); i++){
2272
if( fossil_strncmp(aCmd[i].z, zSubCmd, n)==0 ) break;
2273
}
2274
if( i>=count(aCmd) ){
2275
Blob all;
2276
blob_init(&all,0,0);
2277
for(i=0; i<count(aCmd); i++) blob_appendf(&all, " %s", aCmd[i].z);
2278
fossil_fatal("unknown \"%s\" - should be one of:%s",
2279
zSubCmd, blob_str(&all));
2280
return;
2281
}
2282
iCmd = aCmd[i].iCmd;
2283
}
2284
g.perm.Read = 1;
2285
g.perm.RdTkt = 1;
2286
g.perm.RdWiki = 1;
2287
if( iCmd==1 ){
2288
if( search_index_exists() ) iAction = 2;
2289
}
2290
if( iCmd==2 ){
2291
if( g.argc<3 ) usage("index (on|off)");
2292
iAction = 1 + is_truth(g.argv[3]);
2293
}
2294
db_begin_transaction();
2295
2296
/* Adjust search settings */
2297
if( iCmd==3 || iCmd==4 ){
2298
int k;
2299
const char *zCtrl;
2300
for(k=2; k<g.argc; k++){
2301
if( k==2 ){
2302
if( g.argc<4 ){
2303
zCtrl = "all";
2304
}else{
2305
zCtrl = g.argv[3];
2306
k++;
2307
}
2308
}else{
2309
zCtrl = g.argv[k];
2310
}
2311
if( fossil_strcmp(zCtrl,"all")==0 ){
2312
zCtrl = "cdtwefh";
2313
}
2314
if( strlen(zCtrl)>=4 ){
2315
/* If the argument to "enable" or "disable" is a string of at least
2316
** 4 characters which matches part of any aSetng.zName, then use that
2317
** one aSetng value only. */
2318
char *zGlob = mprintf("*%s*", zCtrl);
2319
for(j=0; j<count(aSetng); j++){
2320
if( sqlite3_strglob(zGlob, aSetng[j].zName)==0 ){
2321
db_set_int(aSetng[j].zSetting/*works-like:"x"*/, iCmd-3, 0);
2322
zCtrl = 0;
2323
break;
2324
}
2325
}
2326
fossil_free(zGlob);
2327
}
2328
if( zCtrl ){
2329
for(j=0; j<count(aSetng); j++){
2330
if( strchr(zCtrl, aSetng[j].zSw[0])!=0 ){
2331
db_set_int(aSetng[j].zSetting/*works-like:"x"*/, iCmd-3, 0);
2332
}
2333
}
2334
}
2335
}
2336
}else if( iCmd==5 ){
2337
int iOldTokenizer, iNewTokenizer;
2338
if( g.argc<4 ) usage("tokenizer porter|on|off|trigram|unicode61");
2339
iOldTokenizer = search_tokenizer_type(0);
2340
db_set("search-tokenizer",
2341
search_tokenizer_for_string(g.argv[3]), 0);
2342
iNewTokenizer = search_tokenizer_type(1);
2343
if( iOldTokenizer!=iNewTokenizer ){
2344
/* Drop or rebuild index if tokenizer changes. */
2345
iAction = 1 + ((iOldTokenizer && iNewTokenizer)
2346
? 1 : (iNewTokenizer ? 1 : 0));
2347
}
2348
}
2349
2350
/* destroy or rebuild the index, if requested */
2351
if( iAction>=1 ){
2352
search_drop_index();
2353
}
2354
if( iAction>=2 ){
2355
search_rebuild_index();
2356
}
2357
2358
/* Always show the status before ending */
2359
for(i=0; i<count(aSetng); i++){
2360
fossil_print("%-21s %s\n", aSetng[i].zName,
2361
db_get_boolean(aSetng[i].zSetting,0) ? "on" : "off");
2362
}
2363
fossil_print("%-21s %s\n", "tokenizer:",
2364
search_tokenizer_for_string(0));
2365
if( search_index_exists() ){
2366
int pgsz = db_int64(0, "PRAGMA repository.page_size;");
2367
i64 nTotal = db_int64(0, "PRAGMA repository.page_count;")*pgsz;
2368
i64 nFts = db_int64(0, "SELECT count(*) FROM dbstat"
2369
" WHERE schema='repository'"
2370
" AND name LIKE 'fts%%'")*pgsz;
2371
char zSize[50];
2372
fossil_print("%-21s FTS%d\n", "full-text index:", search_index_type(1));
2373
fossil_print("%-21s %d\n", "documents:",
2374
db_int(0, "SELECT count(*) FROM ftsdocs"));
2375
approxSizeName(sizeof(zSize), zSize, nFts);
2376
fossil_print("%-21s %s (%.1f%% of repository)\n", "space used",
2377
zSize, 100.0*((double)nFts/(double)nTotal));
2378
}else{
2379
fossil_print("%-21s disabled\n", "full-text index:");
2380
}
2381
db_end_transaction(0);
2382
}
2383
2384
/*
2385
** WEBPAGE: test-ftsdocs
2386
**
2387
** Show a table of all documents currently in the search index.
2388
*/
2389
void search_data_page(void){
2390
Stmt q;
2391
const char *zId = P("id");
2392
const char *zType = P("y");
2393
const char *zIdxed = P("ixed");
2394
int id;
2395
int cnt1 = 0, cnt2 = 0, cnt3 = 0;
2396
login_check_credentials();
2397
if( !g.perm.Admin ){ login_needed(0); return; }
2398
style_set_current_feature("test");
2399
if( !search_index_exists() ){
2400
@ <p>Indexed search is disabled
2401
style_finish_page();
2402
return;
2403
}
2404
search_sql_setup(g.db);
2405
style_submenu_element("Setup","%R/srchsetup");
2406
if( zId!=0 && (id = atoi(zId))>0 ){
2407
/* Show information about a single ftsdocs entry */
2408
style_header("Information about ftsdoc entry %d", id);
2409
style_submenu_element("Summary","%R/test-ftsdocs");
2410
db_prepare(&q,
2411
"SELECT type||rid, name, idxed, label, url, datetime(mtime)"
2412
" FROM ftsdocs WHERE rowid=%d", id
2413
);
2414
if( db_step(&q)==SQLITE_ROW ){
2415
const char *zUrl = db_column_text(&q,4);
2416
const char *zDocId = db_column_text(&q,0);
2417
char *zName;
2418
char *z;
2419
@ <table border=0>
2420
@ <tr><td align='right'>rowid:<td>&nbsp;&nbsp;<td>%d(id)
2421
@ <tr><td align='right'>id:<td><td>%s(zDocId)
2422
@ <tr><td align='right'>name:<td><td>%h(db_column_text(&q,1))
2423
@ <tr><td align='right'>idxed:<td><td>%d(db_column_int(&q,2))
2424
@ <tr><td align='right'>label:<td><td>%h(db_column_text(&q,3))
2425
@ <tr><td align='right'>url:<td><td>
2426
@ <a href='%R%s(zUrl)'>%h(zUrl)</a>
2427
@ <tr><td align='right'>mtime:<td><td>%s(db_column_text(&q,5))
2428
z = db_text(0, "SELECT title FROM ftsidx WHERE rowid=%d",id);
2429
if( z && z[0] ){
2430
@ <tr><td align="right">title:<td><td>%h(z)
2431
fossil_free(z);
2432
}
2433
z = db_text(0, "SELECT body FROM ftsidx WHERE rowid=%d",id);
2434
if( z && z[0] ){
2435
@ <tr><td align="right" valign="top">body:<td><td>%h(z)
2436
fossil_free(z);
2437
}
2438
@ </table>
2439
zName = mprintf("Indexed '%c' docs",zDocId[0]);
2440
style_submenu_element(zName,"%R/test-ftsdocs?y=%c&ixed=1",zDocId[0]);
2441
zName = mprintf("Unindexed '%c' docs",zDocId[0]);
2442
style_submenu_element(zName,"%R/test-ftsdocs?y=%c&ixed=0",zDocId[0]);
2443
}
2444
db_finalize(&q);
2445
style_finish_page();
2446
return;
2447
}
2448
if( zType!=0 && zType[0]!=0 && zType[1]==0 &&
2449
zIdxed!=0 && (zIdxed[0]=='1' || zIdxed[0]=='0') && zIdxed[1]==0
2450
){
2451
int ixed = zIdxed[0]=='1';
2452
char *zName;
2453
style_header("List of '%c' documents that are%s indexed",
2454
zType[0], ixed ? "" : " not");
2455
style_submenu_element("Summary","%R/test-ftsdocs");
2456
if( ixed==0 ){
2457
zName = mprintf("Indexed '%c' docs",zType[0]);
2458
style_submenu_element(zName,"%R/test-ftsdocs?y=%c&ixed=1",zType[0]);
2459
}else{
2460
zName = mprintf("Unindexed '%c' docs",zType[0]);
2461
style_submenu_element(zName,"%R/test-ftsdocs?y=%c&ixed=0",zType[0]);
2462
}
2463
db_prepare(&q,
2464
"SELECT rowid, type||rid ||' '|| coalesce(label,'')"
2465
" FROM ftsdocs WHERE type='%c' AND %s idxed",
2466
zType[0], ixed ? "" : "NOT"
2467
);
2468
@ <ul>
2469
while( db_step(&q)==SQLITE_ROW ){
2470
@ <li> <a href='test-ftsdocs?id=%d(db_column_int(&q,0))'>
2471
@ %h(db_column_text(&q,1))</a>
2472
}
2473
@ </ul>
2474
db_finalize(&q);
2475
style_finish_page();
2476
return;
2477
}
2478
style_header("Summary of ftsdocs");
2479
db_prepare(&q,
2480
"SELECT type, sum(idxed IS TRUE), sum(idxed IS FALSE), count(*)"
2481
" FROM ftsdocs"
2482
" GROUP BY 1 ORDER BY 4 DESC"
2483
);
2484
@ <table border=1 cellpadding=3 cellspacing=0>
2485
@ <thead>
2486
@ <tr><th>Type<th>Indexed<th>Unindexed<th>Total
2487
@ </thead>
2488
@ <tbody>
2489
while( db_step(&q)==SQLITE_ROW ){
2490
const char *zType = db_column_text(&q,0);
2491
int nIndexed = db_column_int(&q, 1);
2492
int nUnindexed = db_column_int(&q, 2);
2493
int nTotal = db_column_int(&q, 3);
2494
@ <tr><td>%h(zType)
2495
if( nIndexed>0 ){
2496
@ <td align="right"><a href='%R/test-ftsdocs?y=%s(zType)&ixed=1'>\
2497
@ %d(nIndexed)</a>
2498
}else{
2499
@ <td align="right">0
2500
}
2501
if( nUnindexed>0 ){
2502
@ <td align="right"><a href='%R/test-ftsdocs?y=%s(zType)&ixed=0'>\
2503
@ %d(nUnindexed)</a>
2504
}else{
2505
@ <td align="right">0
2506
}
2507
@ <td align="right">%d(nTotal)
2508
@ </tr>
2509
cnt1 += nIndexed;
2510
cnt2 += nUnindexed;
2511
cnt3 += nTotal;
2512
}
2513
db_finalize(&q);
2514
@ </tbody><tfooter>
2515
@ <tr><th>Total<th align="right">%d(cnt1)<th align="right">%d(cnt2)
2516
@ <th align="right">%d(cnt3)
2517
@ </tfooter>
2518
@ </table>
2519
style_finish_page();
2520
}
2521
2522
2523
/*
2524
** The Fts5MatchinfoCtx bits were all taken verbatim from:
2525
**
2526
** https://sqlite.org/src/finfo?name=ext/fts5/fts5_test_mi.c
2527
*/
2528
2529
typedef struct Fts5MatchinfoCtx Fts5MatchinfoCtx;
2530
2531
#if INTERFACE
2532
#ifndef SQLITE_AMALGAMATION
2533
typedef unsigned int u32;
2534
#endif
2535
#endif
2536
2537
struct Fts5MatchinfoCtx {
2538
int nCol; /* Number of cols in FTS5 table */
2539
int nPhrase; /* Number of phrases in FTS5 query */
2540
char *zArg; /* nul-term'd copy of 2nd arg */
2541
int nRet; /* Number of elements in aRet[] */
2542
u32 *aRet; /* Array of 32-bit unsigned ints to return */
2543
};
2544
2545
2546
/*
2547
** Return a pointer to the fts5_api pointer for database connection db.
2548
** If an error occurs, return NULL and leave an error in the database
2549
** handle (accessible using sqlite3_errcode()/errmsg()).
2550
*/
2551
static int fts5_api_from_db(sqlite3 *db, fts5_api **ppApi){
2552
sqlite3_stmt *pStmt = 0;
2553
int rc;
2554
2555
*ppApi = 0;
2556
rc = sqlite3_prepare(db, "SELECT fts5(?1)", -1, &pStmt, 0);
2557
if( rc==SQLITE_OK ){
2558
sqlite3_bind_pointer(pStmt, 1, (void*)ppApi, "fts5_api_ptr", 0);
2559
(void)sqlite3_step(pStmt);
2560
rc = sqlite3_finalize(pStmt);
2561
}
2562
2563
return rc;
2564
}
2565
2566
2567
/*
2568
** Argument f should be a flag accepted by matchinfo() (a valid character
2569
** in the string passed as the second argument). If it is not, -1 is
2570
** returned. Otherwise, if f is a valid matchinfo flag, the value returned
2571
** is the number of 32-bit integers added to the output array if the
2572
** table has nCol columns and the query nPhrase phrases.
2573
*/
2574
static int fts5MatchinfoFlagsize(int nCol, int nPhrase, char f){
2575
int ret = -1;
2576
switch( f ){
2577
case 'p': ret = 1; break;
2578
case 'c': ret = 1; break;
2579
case 'x': ret = 3 * nCol * nPhrase; break;
2580
case 'y': ret = nCol * nPhrase; break;
2581
case 'b': ret = ((nCol + 31) / 32) * nPhrase; break;
2582
case 'n': ret = 1; break;
2583
case 'a': ret = nCol; break;
2584
case 'l': ret = nCol; break;
2585
case 's': ret = nCol; break;
2586
}
2587
return ret;
2588
}
2589
2590
static int fts5MatchinfoIter(
2591
const Fts5ExtensionApi *pApi, /* API offered by current FTS version */
2592
Fts5Context *pFts, /* First arg to pass to pApi functions */
2593
Fts5MatchinfoCtx *p,
2594
int(*x)(const Fts5ExtensionApi*,Fts5Context*,Fts5MatchinfoCtx*,char,u32*)
2595
){
2596
int i;
2597
int n = 0;
2598
int rc = SQLITE_OK;
2599
char f;
2600
for(i=0; (f = p->zArg[i]); i++){
2601
rc = x(pApi, pFts, p, f, &p->aRet[n]);
2602
if( rc!=SQLITE_OK ) break;
2603
n += fts5MatchinfoFlagsize(p->nCol, p->nPhrase, f);
2604
}
2605
return rc;
2606
}
2607
2608
static int fts5MatchinfoXCb(
2609
const Fts5ExtensionApi *pApi,
2610
Fts5Context *pFts,
2611
void *pUserData
2612
){
2613
Fts5PhraseIter iter;
2614
int iCol, iOff;
2615
u32 *aOut = (u32*)pUserData;
2616
int iPrev = -1;
2617
2618
for(pApi->xPhraseFirst(pFts, 0, &iter, &iCol, &iOff);
2619
iCol>=0;
2620
pApi->xPhraseNext(pFts, &iter, &iCol, &iOff)
2621
){
2622
aOut[iCol*3+1]++;
2623
if( iCol!=iPrev ) aOut[iCol*3 + 2]++;
2624
iPrev = iCol;
2625
}
2626
2627
return SQLITE_OK;
2628
}
2629
2630
static int fts5MatchinfoGlobalCb(
2631
const Fts5ExtensionApi *pApi,
2632
Fts5Context *pFts,
2633
Fts5MatchinfoCtx *p,
2634
char f,
2635
u32 *aOut
2636
){
2637
int rc = SQLITE_OK;
2638
switch( f ){
2639
case 'p':
2640
aOut[0] = p->nPhrase;
2641
break;
2642
2643
case 'c':
2644
aOut[0] = p->nCol;
2645
break;
2646
2647
case 'x': {
2648
int i;
2649
for(i=0; i<p->nPhrase && rc==SQLITE_OK; i++){
2650
void *pPtr = (void*)&aOut[i * p->nCol * 3];
2651
rc = pApi->xQueryPhrase(pFts, i, pPtr, fts5MatchinfoXCb);
2652
}
2653
break;
2654
}
2655
2656
case 'n': {
2657
sqlite3_int64 nRow;
2658
rc = pApi->xRowCount(pFts, &nRow);
2659
aOut[0] = (u32)nRow;
2660
break;
2661
}
2662
2663
case 'a': {
2664
sqlite3_int64 nRow = 0;
2665
rc = pApi->xRowCount(pFts, &nRow);
2666
if( nRow==0 ){
2667
memset(aOut, 0, sizeof(u32) * p->nCol);
2668
}else{
2669
int i;
2670
for(i=0; rc==SQLITE_OK && i<p->nCol; i++){
2671
sqlite3_int64 nToken;
2672
rc = pApi->xColumnTotalSize(pFts, i, &nToken);
2673
if( rc==SQLITE_OK){
2674
aOut[i] = (u32)((2*nToken + nRow) / (2*nRow));
2675
}
2676
}
2677
}
2678
break;
2679
}
2680
2681
}
2682
return rc;
2683
}
2684
2685
static int fts5MatchinfoLocalCb(
2686
const Fts5ExtensionApi *pApi,
2687
Fts5Context *pFts,
2688
Fts5MatchinfoCtx *p,
2689
char f,
2690
u32 *aOut
2691
){
2692
int i;
2693
int rc = SQLITE_OK;
2694
2695
switch( f ){
2696
case 'b': {
2697
int iPhrase;
2698
int nInt = ((p->nCol + 31) / 32) * p->nPhrase;
2699
for(i=0; i<nInt; i++) aOut[i] = 0;
2700
2701
for(iPhrase=0; iPhrase<p->nPhrase; iPhrase++){
2702
Fts5PhraseIter iter;
2703
int iCol;
2704
for(pApi->xPhraseFirstColumn(pFts, iPhrase, &iter, &iCol);
2705
iCol>=0;
2706
pApi->xPhraseNextColumn(pFts, &iter, &iCol)
2707
){
2708
aOut[iPhrase * ((p->nCol+31)/32) + iCol/32] |= ((u32)1 << iCol%32);
2709
}
2710
}
2711
2712
break;
2713
}
2714
2715
case 'x':
2716
case 'y': {
2717
int nMul = (f=='x' ? 3 : 1);
2718
int iPhrase;
2719
2720
for(i=0; i<(p->nCol*p->nPhrase); i++) aOut[i*nMul] = 0;
2721
2722
for(iPhrase=0; iPhrase<p->nPhrase; iPhrase++){
2723
Fts5PhraseIter iter;
2724
int iOff, iCol;
2725
for(pApi->xPhraseFirst(pFts, iPhrase, &iter, &iCol, &iOff);
2726
iOff>=0;
2727
pApi->xPhraseNext(pFts, &iter, &iCol, &iOff)
2728
){
2729
aOut[nMul * (iCol + iPhrase * p->nCol)]++;
2730
}
2731
}
2732
2733
break;
2734
}
2735
2736
case 'l': {
2737
for(i=0; rc==SQLITE_OK && i<p->nCol; i++){
2738
int nToken;
2739
rc = pApi->xColumnSize(pFts, i, &nToken);
2740
aOut[i] = (u32)nToken;
2741
}
2742
break;
2743
}
2744
2745
case 's': {
2746
int nInst;
2747
2748
memset(aOut, 0, sizeof(u32) * p->nCol);
2749
2750
rc = pApi->xInstCount(pFts, &nInst);
2751
for(i=0; rc==SQLITE_OK && i<nInst; i++){
2752
int iPhrase, iOff, iCol = 0;
2753
int iNextPhrase;
2754
int iNextOff;
2755
u32 nSeq = 1;
2756
int j;
2757
2758
rc = pApi->xInst(pFts, i, &iPhrase, &iCol, &iOff);
2759
iNextPhrase = iPhrase+1;
2760
iNextOff = iOff+pApi->xPhraseSize(pFts, 0);
2761
for(j=i+1; rc==SQLITE_OK && j<nInst; j++){
2762
int ip, ic, io;
2763
rc = pApi->xInst(pFts, j, &ip, &ic, &io);
2764
if( ic!=iCol || io>iNextOff ) break;
2765
if( ip==iNextPhrase && io==iNextOff ){
2766
nSeq++;
2767
iNextPhrase = ip+1;
2768
iNextOff = io + pApi->xPhraseSize(pFts, ip);
2769
}
2770
}
2771
2772
if( nSeq>aOut[iCol] ) aOut[iCol] = nSeq;
2773
}
2774
2775
break;
2776
}
2777
}
2778
return rc;
2779
}
2780
2781
static Fts5MatchinfoCtx *fts5MatchinfoNew(
2782
const Fts5ExtensionApi *pApi, /* API offered by current FTS version */
2783
Fts5Context *pFts, /* First arg to pass to pApi functions */
2784
sqlite3_context *pCtx, /* Context for returning error message */
2785
const char *zArg /* Matchinfo flag string */
2786
){
2787
Fts5MatchinfoCtx *p;
2788
int nCol;
2789
int nPhrase;
2790
int i;
2791
int nInt;
2792
sqlite3_int64 nByte;
2793
int rc;
2794
2795
nCol = pApi->xColumnCount(pFts);
2796
nPhrase = pApi->xPhraseCount(pFts);
2797
2798
nInt = 0;
2799
for(i=0; zArg[i]; i++){
2800
int n = fts5MatchinfoFlagsize(nCol, nPhrase, zArg[i]);
2801
if( n<0 ){
2802
char *zErr = sqlite3_mprintf("unrecognized matchinfo flag: %c", zArg[i]);
2803
sqlite3_result_error(pCtx, zErr, -1);
2804
sqlite3_free(zErr);
2805
return 0;
2806
}
2807
nInt += n;
2808
}
2809
2810
nByte = sizeof(Fts5MatchinfoCtx) /* The struct itself */
2811
+ sizeof(u32) * nInt /* The p->aRet[] array */
2812
+ (i+1); /* The p->zArg string */
2813
p = (Fts5MatchinfoCtx*)sqlite3_malloc64(nByte);
2814
if( p==0 ){
2815
sqlite3_result_error_nomem(pCtx);
2816
return 0;
2817
}
2818
memset(p, 0, nByte);
2819
2820
p->nCol = nCol;
2821
p->nPhrase = nPhrase;
2822
p->aRet = (u32*)&p[1];
2823
p->nRet = nInt;
2824
p->zArg = (char*)&p->aRet[nInt];
2825
memcpy(p->zArg, zArg, i);
2826
2827
rc = fts5MatchinfoIter(pApi, pFts, p, fts5MatchinfoGlobalCb);
2828
if( rc!=SQLITE_OK ){
2829
sqlite3_result_error_code(pCtx, rc);
2830
sqlite3_free(p);
2831
p = 0;
2832
}
2833
2834
return p;
2835
}
2836
2837
static void fts5MatchinfoFunc(
2838
const Fts5ExtensionApi *pApi, /* API offered by current FTS version */
2839
Fts5Context *pFts, /* First arg to pass to pApi functions */
2840
sqlite3_context *pCtx, /* Context for returning result/error */
2841
int nVal,

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button