|
1
|
/* |
|
2
|
** Copyright (c) 2009 D. Richard Hipp |
|
3
|
** |
|
4
|
** This program is free software; you can redistribute it and/or |
|
5
|
** modify it under the terms of the Simplified BSD License (also |
|
6
|
** known as the "2-Clause License" or "FreeBSD License".) |
|
7
|
|
|
8
|
** This program is distributed in the hope that it will be useful, |
|
9
|
** but without any warranty; without even the implied warranty of |
|
10
|
** merchantability or fitness for a particular purpose. |
|
11
|
** |
|
12
|
** Author contact information: |
|
13
|
** [email protected] |
|
14
|
** http://www.hwaci.com/drh/ |
|
15
|
** |
|
16
|
******************************************************************************* |
|
17
|
** |
|
18
|
** This file contains code to implement a search functions |
|
19
|
** against timeline comments, check-in content, wiki pages, tickets, |
|
20
|
** and/or forum posts. |
|
21
|
** |
|
22
|
** The search can be either a per-query "grep"-like search that scans |
|
23
|
** the entire corpus. Or it can use the FTS5 search engine of SQLite. |
|
24
|
** The choice is an administrator configuration option. |
|
25
|
** |
|
26
|
** The first option is referred to as "full-scan search". The second |
|
27
|
** option is called "indexed search". |
|
28
|
** |
|
29
|
** The code in this file is ordered approximately as follows: |
|
30
|
** |
|
31
|
** (1) The full-scan search engine |
|
32
|
** (2) The indexed search engine |
|
33
|
** (3) Higher level interfaces that use either (1) or (b2) according |
|
34
|
** to the current search configuration settings |
|
35
|
*/ |
|
36
|
#include "config.h" |
|
37
|
#include "search.h" |
|
38
|
#include <assert.h> |
|
39
|
|
|
40
|
#if INTERFACE |
|
41
|
|
|
42
|
/* Maximum number of search terms for full-scan search */ |
|
43
|
#define SEARCH_MAX_TERM 8 |
|
44
|
|
|
45
|
/* |
|
46
|
** A compiled search pattern used for full-scan search. |
|
47
|
*/ |
|
48
|
struct Search { |
|
49
|
int nTerm; /* Number of search terms */ |
|
50
|
struct srchTerm { /* For each search term */ |
|
51
|
char *z; /* Text */ |
|
52
|
int n; /* length */ |
|
53
|
} a[SEARCH_MAX_TERM]; |
|
54
|
/* Snippet controls */ |
|
55
|
char *zPattern; /* The search pattern */ |
|
56
|
char *zMarkBegin; /* Start of a match */ |
|
57
|
char *zMarkEnd; /* End of a match */ |
|
58
|
char *zMarkGap; /* A gap between two matches */ |
|
59
|
unsigned fSrchFlg; /* Flags */ |
|
60
|
int iScore; /* Score of the last match attempt */ |
|
61
|
Blob snip; /* Snippet for the most recent match */ |
|
62
|
}; |
|
63
|
|
|
64
|
#define SRCHFLG_HTML 0x01 /* Escape snippet text for HTML */ |
|
65
|
#define SRCHFLG_STATIC 0x04 /* The static gSearch object */ |
|
66
|
|
|
67
|
#endif |
|
68
|
|
|
69
|
/* |
|
70
|
** There is a single global Search object: |
|
71
|
*/ |
|
72
|
static Search gSearch; |
|
73
|
|
|
74
|
|
|
75
|
/* |
|
76
|
** Theses characters constitute a word boundary |
|
77
|
*/ |
|
78
|
static const char isBoundary[] = { |
|
79
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
|
80
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
|
81
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
|
82
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, |
|
83
|
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
84
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, |
|
85
|
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
86
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, |
|
87
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
88
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
89
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
90
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
91
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
92
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
93
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
94
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
95
|
}; |
|
96
|
#define ISALNUM(x) (!isBoundary[(x)&0xff]) |
|
97
|
|
|
98
|
|
|
99
|
/* |
|
100
|
** Destroy a full-scan search context. |
|
101
|
*/ |
|
102
|
void search_end(Search *p){ |
|
103
|
if( p ){ |
|
104
|
fossil_free(p->zPattern); |
|
105
|
fossil_free(p->zMarkBegin); |
|
106
|
fossil_free(p->zMarkEnd); |
|
107
|
fossil_free(p->zMarkGap); |
|
108
|
if( p->iScore ) blob_reset(&p->snip); |
|
109
|
memset(p, 0, sizeof(*p)); |
|
110
|
if( p!=&gSearch ) fossil_free(p); |
|
111
|
} |
|
112
|
} |
|
113
|
|
|
114
|
/* |
|
115
|
** Compile a full-scan search pattern |
|
116
|
*/ |
|
117
|
static Search *search_init( |
|
118
|
const char *zPattern, /* The search pattern */ |
|
119
|
const char *zMarkBegin, /* Start of a match */ |
|
120
|
const char *zMarkEnd, /* End of a match */ |
|
121
|
const char *zMarkGap, /* A gap between two matches */ |
|
122
|
unsigned fSrchFlg /* Flags */ |
|
123
|
){ |
|
124
|
Search *p; |
|
125
|
char *z; |
|
126
|
int i; |
|
127
|
|
|
128
|
if( fSrchFlg & SRCHFLG_STATIC ){ |
|
129
|
p = &gSearch; |
|
130
|
search_end(p); |
|
131
|
}else{ |
|
132
|
p = fossil_malloc(sizeof(*p)); |
|
133
|
memset(p, 0, sizeof(*p)); |
|
134
|
} |
|
135
|
p->zPattern = z = mprintf("%s",zPattern); |
|
136
|
p->zMarkBegin = mprintf("%s",zMarkBegin); |
|
137
|
p->zMarkEnd = mprintf("%s",zMarkEnd); |
|
138
|
p->zMarkGap = mprintf("%s",zMarkGap); |
|
139
|
p->fSrchFlg = fSrchFlg; |
|
140
|
blob_init(&p->snip, 0, 0); |
|
141
|
while( *z && p->nTerm<SEARCH_MAX_TERM ){ |
|
142
|
while( *z && !ISALNUM(*z) ){ z++; } |
|
143
|
if( *z==0 ) break; |
|
144
|
p->a[p->nTerm].z = z; |
|
145
|
for(i=1; ISALNUM(z[i]); i++){} |
|
146
|
p->a[p->nTerm].n = i; |
|
147
|
z += i; |
|
148
|
p->nTerm++; |
|
149
|
} |
|
150
|
return p; |
|
151
|
} |
|
152
|
|
|
153
|
|
|
154
|
/* |
|
155
|
** Append n bytes of text to snippet zTxt. Encode the text appropriately. |
|
156
|
*/ |
|
157
|
static void snippet_text_append( |
|
158
|
Search *p, /* The search context */ |
|
159
|
Blob *pSnip, /* Append to this snippet */ |
|
160
|
const char *zTxt, /* Text to append */ |
|
161
|
int n /* How many bytes to append */ |
|
162
|
){ |
|
163
|
if( n>0 ){ |
|
164
|
if( p->fSrchFlg & SRCHFLG_HTML ){ |
|
165
|
blob_appendf(pSnip, "%#h", n, zTxt); |
|
166
|
}else{ |
|
167
|
blob_append(pSnip, zTxt, n); |
|
168
|
} |
|
169
|
} |
|
170
|
} |
|
171
|
|
|
172
|
/* This the core search engine for full-scan search. |
|
173
|
** |
|
174
|
** Compare a search pattern against one or more input strings which |
|
175
|
** collectively comprise a document. Return a match score. Any |
|
176
|
** postive value means there was a match. Zero means that one or |
|
177
|
** more terms are missing. |
|
178
|
** |
|
179
|
** The score and a snippet are record for future use. |
|
180
|
** |
|
181
|
** Scoring: |
|
182
|
** * All terms must match at least once or the score is zero |
|
183
|
** * One point for each matching term |
|
184
|
** * Extra points if consecutive words of the pattern are consecutive |
|
185
|
** in the document |
|
186
|
*/ |
|
187
|
static int search_match( |
|
188
|
Search *p, /* Search pattern and flags */ |
|
189
|
int nDoc, /* Number of strings in this document */ |
|
190
|
const char **azDoc /* Text of each string */ |
|
191
|
){ |
|
192
|
int score; /* Final score */ |
|
193
|
int i; /* Offset into current document */ |
|
194
|
int ii; /* Loop counter */ |
|
195
|
int j; /* Loop over search terms */ |
|
196
|
int k; /* Loop over prior terms */ |
|
197
|
int iWord = 0; /* Current word number */ |
|
198
|
int iDoc; /* Current document number */ |
|
199
|
int wantGap = 0; /* True if a zMarkGap is wanted */ |
|
200
|
const char *zDoc; /* Current document text */ |
|
201
|
const int CTX = 50; /* Amount of snippet context */ |
|
202
|
int anMatch[SEARCH_MAX_TERM]; /* Number of terms in best match */ |
|
203
|
int aiBestDoc[SEARCH_MAX_TERM]; /* Document containing best match */ |
|
204
|
int aiBestOfst[SEARCH_MAX_TERM]; /* Byte offset to start of best match */ |
|
205
|
int aiLastDoc[SEARCH_MAX_TERM]; /* Document containing most recent match */ |
|
206
|
int aiLastOfst[SEARCH_MAX_TERM]; /* Byte offset to the most recent match */ |
|
207
|
int aiWordIdx[SEARCH_MAX_TERM]; /* Word index of most recent match */ |
|
208
|
|
|
209
|
memset(anMatch, 0, sizeof(anMatch)); |
|
210
|
memset(aiWordIdx, 0xff, sizeof(aiWordIdx)); |
|
211
|
for(iDoc=0; iDoc<nDoc; iDoc++){ |
|
212
|
zDoc = azDoc[iDoc]; |
|
213
|
if( zDoc==0 ) continue; |
|
214
|
iWord++; |
|
215
|
for(i=0; zDoc[i]; i++){ |
|
216
|
if( !ISALNUM(zDoc[i]) ) continue; |
|
217
|
iWord++; |
|
218
|
for(j=0; j<p->nTerm; j++){ |
|
219
|
int n = p->a[j].n; |
|
220
|
if( sqlite3_strnicmp(p->a[j].z, &zDoc[i], n)==0 |
|
221
|
&& (!ISALNUM(zDoc[i+n]) || p->a[j].z[n]=='*') |
|
222
|
){ |
|
223
|
aiWordIdx[j] = iWord; |
|
224
|
aiLastDoc[j] = iDoc; |
|
225
|
aiLastOfst[j] = i; |
|
226
|
for(k=1; j-k>=0 && anMatch[j-k] && aiWordIdx[j-k]==iWord-k; k++){} |
|
227
|
for(ii=0; ii<k; ii++){ |
|
228
|
if( anMatch[j-ii]<k ){ |
|
229
|
anMatch[j-ii] = k*(nDoc-iDoc); |
|
230
|
aiBestDoc[j-ii] = aiLastDoc[j-ii]; |
|
231
|
aiBestOfst[j-ii] = aiLastOfst[j-ii]; |
|
232
|
} |
|
233
|
} |
|
234
|
break; |
|
235
|
} |
|
236
|
} |
|
237
|
while( ISALNUM(zDoc[i]) ){ i++; } |
|
238
|
if( zDoc[i]==0 ) break; |
|
239
|
} |
|
240
|
} |
|
241
|
|
|
242
|
/* Finished search all documents. |
|
243
|
** Every term must be seen or else the score is zero |
|
244
|
*/ |
|
245
|
score = 1; |
|
246
|
for(j=0; j<p->nTerm; j++) score *= anMatch[j]; |
|
247
|
blob_reset(&p->snip); |
|
248
|
p->iScore = score; |
|
249
|
if( score==0 ) return score; |
|
250
|
|
|
251
|
|
|
252
|
/* Prepare a snippet that describes the matching text. |
|
253
|
*/ |
|
254
|
while(1){ |
|
255
|
int iOfst; |
|
256
|
int iTail; |
|
257
|
int iBest; |
|
258
|
for(ii=0; ii<p->nTerm && anMatch[ii]==0; ii++){} |
|
259
|
if( ii>=p->nTerm ) break; /* This is where the loop exits */ |
|
260
|
iBest = ii; |
|
261
|
iDoc = aiBestDoc[ii]; |
|
262
|
iOfst = aiBestOfst[ii]; |
|
263
|
for(; ii<p->nTerm; ii++){ |
|
264
|
if( anMatch[ii]==0 ) continue; |
|
265
|
if( aiBestDoc[ii]>iDoc ) continue; |
|
266
|
if( aiBestOfst[ii]>iOfst ) continue; |
|
267
|
iDoc = aiBestDoc[ii]; |
|
268
|
iOfst = aiBestOfst[ii]; |
|
269
|
iBest = ii; |
|
270
|
} |
|
271
|
iTail = iOfst + p->a[iBest].n; |
|
272
|
anMatch[iBest] = 0; |
|
273
|
for(ii=0; ii<p->nTerm; ii++){ |
|
274
|
if( anMatch[ii]==0 ) continue; |
|
275
|
if( aiBestDoc[ii]!=iDoc ) continue; |
|
276
|
if( aiBestOfst[ii]<=iTail+CTX*2 ){ |
|
277
|
if( iTail<aiBestOfst[ii]+p->a[ii].n ){ |
|
278
|
iTail = aiBestOfst[ii]+p->a[ii].n; |
|
279
|
} |
|
280
|
anMatch[ii] = 0; |
|
281
|
ii = -1; |
|
282
|
continue; |
|
283
|
} |
|
284
|
} |
|
285
|
zDoc = azDoc[iDoc]; |
|
286
|
iOfst -= CTX; |
|
287
|
if( iOfst<0 ) iOfst = 0; |
|
288
|
while( iOfst>0 && ISALNUM(zDoc[iOfst-1]) ) iOfst--; |
|
289
|
while( zDoc[iOfst] && !ISALNUM(zDoc[iOfst]) ) iOfst++; |
|
290
|
for(ii=0; ii<CTX && zDoc[iTail]; ii++, iTail++){} |
|
291
|
while( ISALNUM(zDoc[iTail]) ) iTail++; |
|
292
|
if( iOfst>0 || wantGap ) blob_append(&p->snip, p->zMarkGap, -1); |
|
293
|
wantGap = zDoc[iTail]!=0; |
|
294
|
zDoc += iOfst; |
|
295
|
iTail -= iOfst; |
|
296
|
|
|
297
|
/* Add a snippet segment using characters iOfst..iOfst+iTail from zDoc */ |
|
298
|
for(i=0; i<iTail; i++){ |
|
299
|
if( !ISALNUM(zDoc[i]) ) continue; |
|
300
|
for(j=0; j<p->nTerm; j++){ |
|
301
|
int n = p->a[j].n; |
|
302
|
if( sqlite3_strnicmp(p->a[j].z, &zDoc[i], n)==0 |
|
303
|
&& (!ISALNUM(zDoc[i+n]) || p->a[j].z[n]=='*') |
|
304
|
){ |
|
305
|
snippet_text_append(p, &p->snip, zDoc, i); |
|
306
|
zDoc += i; |
|
307
|
iTail -= i; |
|
308
|
blob_append(&p->snip, p->zMarkBegin, -1); |
|
309
|
if( p->a[j].z[n]=='*' ){ |
|
310
|
while( ISALNUM(zDoc[n]) ) n++; |
|
311
|
} |
|
312
|
snippet_text_append(p, &p->snip, zDoc, n); |
|
313
|
zDoc += n; |
|
314
|
iTail -= n; |
|
315
|
blob_append(&p->snip, p->zMarkEnd, -1); |
|
316
|
i = -1; |
|
317
|
break; |
|
318
|
} /* end-if */ |
|
319
|
} /* end for(j) */ |
|
320
|
if( j<p->nTerm ){ |
|
321
|
while( ISALNUM(zDoc[i]) && i<iTail ){ i++; } |
|
322
|
} |
|
323
|
} /* end for(i) */ |
|
324
|
snippet_text_append(p, &p->snip, zDoc, iTail); |
|
325
|
} |
|
326
|
if( wantGap ) blob_append(&p->snip, p->zMarkGap, -1); |
|
327
|
return score; |
|
328
|
} |
|
329
|
|
|
330
|
/* |
|
331
|
** COMMAND: test-match |
|
332
|
** |
|
333
|
** Usage: %fossil test-match SEARCHSTRING FILE1 FILE2 ... |
|
334
|
** |
|
335
|
** Run the full-scan search algorithm using SEARCHSTRING against |
|
336
|
** the text of the files listed. Output matches and snippets. |
|
337
|
** |
|
338
|
** Options: |
|
339
|
** --begin TEXT Text to insert before each match |
|
340
|
** --end TEXT Text to insert after each match |
|
341
|
** --gap TEXT Text to indicate elided content |
|
342
|
** --html Input is HTML |
|
343
|
** --static Use the static Search object |
|
344
|
*/ |
|
345
|
void test_match_cmd(void){ |
|
346
|
Search *p; |
|
347
|
int i; |
|
348
|
Blob x; |
|
349
|
int score; |
|
350
|
char *zDoc; |
|
351
|
int flg = 0; |
|
352
|
char *zBegin = (char*)find_option("begin",0,1); |
|
353
|
char *zEnd = (char*)find_option("end",0,1); |
|
354
|
char *zGap = (char*)find_option("gap",0,1); |
|
355
|
if( find_option("html",0,0)!=0 ) flg |= SRCHFLG_HTML; |
|
356
|
if( find_option("static",0,0)!=0 ) flg |= SRCHFLG_STATIC; |
|
357
|
verify_all_options(); |
|
358
|
if( g.argc<4 ) usage("SEARCHSTRING FILE1..."); |
|
359
|
if( zBegin==0 ) zBegin = "[["; |
|
360
|
if( zEnd==0 ) zEnd = "]]"; |
|
361
|
if( zGap==0 ) zGap = " ... "; |
|
362
|
p = search_init(g.argv[2], zBegin, zEnd, zGap, flg); |
|
363
|
for(i=3; i<g.argc; i++){ |
|
364
|
blob_read_from_file(&x, g.argv[i], ExtFILE); |
|
365
|
zDoc = blob_str(&x); |
|
366
|
score = search_match(p, 1, (const char**)&zDoc); |
|
367
|
fossil_print("%s: %d\n", g.argv[i], p->iScore); |
|
368
|
blob_reset(&x); |
|
369
|
if( score ){ |
|
370
|
fossil_print("%.78c\n%s\n%.78c\n\n", '=', blob_str(&p->snip), '='); |
|
371
|
} |
|
372
|
} |
|
373
|
search_end(p); |
|
374
|
} |
|
375
|
|
|
376
|
/* |
|
377
|
** An SQL function to initialize the full-scan search pattern: |
|
378
|
** |
|
379
|
** search_init(PATTERN,BEGIN,END,GAP,FLAGS) |
|
380
|
** |
|
381
|
** All arguments are optional. PATTERN is the search pattern. If it |
|
382
|
** is omitted, then the global search pattern is reset. BEGIN and END |
|
383
|
** and GAP are the strings used to construct snippets. FLAGS is an |
|
384
|
** integer bit pattern containing the various SRCH_CKIN, SRCH_DOC, |
|
385
|
** SRCH_TKT, SRCH_FORUM, or SRCH_ALL bits to determine what is to be |
|
386
|
** searched. |
|
387
|
*/ |
|
388
|
static void search_init_sqlfunc( |
|
389
|
sqlite3_context *context, |
|
390
|
int argc, |
|
391
|
sqlite3_value **argv |
|
392
|
){ |
|
393
|
const char *zPattern = 0; |
|
394
|
const char *zBegin = "<mark>"; |
|
395
|
const char *zEnd = "</mark>"; |
|
396
|
const char *zGap = " ... "; |
|
397
|
unsigned int flg = SRCHFLG_HTML; |
|
398
|
switch( argc ){ |
|
399
|
default: |
|
400
|
flg = (unsigned int)sqlite3_value_int(argv[4]); |
|
401
|
case 4: |
|
402
|
zGap = (const char*)sqlite3_value_text(argv[3]); |
|
403
|
case 3: |
|
404
|
zEnd = (const char*)sqlite3_value_text(argv[2]); |
|
405
|
case 2: |
|
406
|
zBegin = (const char*)sqlite3_value_text(argv[1]); |
|
407
|
case 1: |
|
408
|
zPattern = (const char*)sqlite3_value_text(argv[0]); |
|
409
|
} |
|
410
|
if( zPattern && zPattern[0] ){ |
|
411
|
search_init(zPattern, zBegin, zEnd, zGap, flg | SRCHFLG_STATIC); |
|
412
|
}else{ |
|
413
|
search_end(&gSearch); |
|
414
|
} |
|
415
|
} |
|
416
|
|
|
417
|
/* search_match(TEXT, TEXT, ....) |
|
418
|
** |
|
419
|
** Using the full-scan search engine created by the most recent call |
|
420
|
** to search_init(), match the input the TEXT arguments. |
|
421
|
** Remember the results in the global full-scan search object. |
|
422
|
** Return non-zero on a match and zero on a miss. |
|
423
|
*/ |
|
424
|
static void search_match_sqlfunc( |
|
425
|
sqlite3_context *context, |
|
426
|
int argc, |
|
427
|
sqlite3_value **argv |
|
428
|
){ |
|
429
|
const char *azDoc[5]; |
|
430
|
int nDoc; |
|
431
|
int rc; |
|
432
|
for(nDoc=0; nDoc<count(azDoc) && nDoc<argc; nDoc++){ |
|
433
|
azDoc[nDoc] = (const char*)sqlite3_value_text(argv[nDoc]); |
|
434
|
if( azDoc[nDoc]==0 ) azDoc[nDoc] = ""; |
|
435
|
} |
|
436
|
rc = search_match(&gSearch, nDoc, azDoc); |
|
437
|
sqlite3_result_int(context, rc); |
|
438
|
} |
|
439
|
|
|
440
|
|
|
441
|
/* search_score() |
|
442
|
** |
|
443
|
** Return the match score for the last successful search_match call. |
|
444
|
*/ |
|
445
|
static void search_score_sqlfunc( |
|
446
|
sqlite3_context *context, |
|
447
|
int argc, |
|
448
|
sqlite3_value **argv |
|
449
|
){ |
|
450
|
sqlite3_result_int(context, gSearch.iScore); |
|
451
|
} |
|
452
|
|
|
453
|
/* search_snippet() |
|
454
|
** |
|
455
|
** Return a snippet for the last successful search_match() call. |
|
456
|
*/ |
|
457
|
static void search_snippet_sqlfunc( |
|
458
|
sqlite3_context *context, |
|
459
|
int argc, |
|
460
|
sqlite3_value **argv |
|
461
|
){ |
|
462
|
if( blob_size(&gSearch.snip)>0 ){ |
|
463
|
sqlite3_result_text(context, blob_str(&gSearch.snip), -1, fossil_free); |
|
464
|
blob_init(&gSearch.snip, 0, 0); |
|
465
|
} |
|
466
|
} |
|
467
|
|
|
468
|
/* stext(TYPE, RID, ARG) |
|
469
|
** |
|
470
|
** This is an SQLite function that computes the searchable text. |
|
471
|
** It is a wrapper around the search_stext() routine. See the |
|
472
|
** search_stext() routine for further detail. |
|
473
|
*/ |
|
474
|
static void search_stext_sqlfunc( |
|
475
|
sqlite3_context *context, |
|
476
|
int argc, |
|
477
|
sqlite3_value **argv |
|
478
|
){ |
|
479
|
const char *zType = (const char*)sqlite3_value_text(argv[0]); |
|
480
|
int rid = sqlite3_value_int(argv[1]); |
|
481
|
const char *zName = (const char*)sqlite3_value_text(argv[2]); |
|
482
|
sqlite3_result_text(context, search_stext_cached(zType[0],rid,zName,0), -1, |
|
483
|
SQLITE_TRANSIENT); |
|
484
|
} |
|
485
|
|
|
486
|
/* title(TYPE, RID, ARG) |
|
487
|
** |
|
488
|
** Return the title of the document to be search. |
|
489
|
*/ |
|
490
|
static void search_title_sqlfunc( |
|
491
|
sqlite3_context *context, |
|
492
|
int argc, |
|
493
|
sqlite3_value **argv |
|
494
|
){ |
|
495
|
const char *zType = (const char*)sqlite3_value_text(argv[0]); |
|
496
|
int rid = sqlite3_value_int(argv[1]); |
|
497
|
const char *zName = (const char*)sqlite3_value_text(argv[2]); |
|
498
|
int nHdr = 0; |
|
499
|
char *z = search_stext_cached(zType[0], rid, zName, &nHdr); |
|
500
|
if( nHdr || zType[0]!='d' ){ |
|
501
|
sqlite3_result_text(context, z, nHdr, SQLITE_TRANSIENT); |
|
502
|
}else{ |
|
503
|
sqlite3_result_value(context, argv[2]); |
|
504
|
} |
|
505
|
} |
|
506
|
|
|
507
|
/* body(TYPE, RID, ARG) |
|
508
|
** |
|
509
|
** Return the body of the document to be search. |
|
510
|
*/ |
|
511
|
static void search_body_sqlfunc( |
|
512
|
sqlite3_context *context, |
|
513
|
int argc, |
|
514
|
sqlite3_value **argv |
|
515
|
){ |
|
516
|
const char *zType = (const char*)sqlite3_value_text(argv[0]); |
|
517
|
int rid = sqlite3_value_int(argv[1]); |
|
518
|
const char *zName = (const char*)sqlite3_value_text(argv[2]); |
|
519
|
int nHdr = 0; |
|
520
|
char *z = search_stext_cached(zType[0], rid, zName, &nHdr); |
|
521
|
sqlite3_result_text(context, z+nHdr+1, -1, SQLITE_TRANSIENT); |
|
522
|
} |
|
523
|
|
|
524
|
/* urlencode(X) |
|
525
|
** |
|
526
|
** Encode a string for use as a query parameter in a URL. This is |
|
527
|
** the equivalent of printf("%T",X). |
|
528
|
*/ |
|
529
|
static void search_urlencode_sqlfunc( |
|
530
|
sqlite3_context *context, |
|
531
|
int argc, |
|
532
|
sqlite3_value **argv |
|
533
|
){ |
|
534
|
char *z = mprintf("%T",sqlite3_value_text(argv[0])); |
|
535
|
sqlite3_result_text(context, z, -1, fossil_free); |
|
536
|
} |
|
537
|
|
|
538
|
/* |
|
539
|
** Register the various SQL functions (defined above) needed to implement |
|
540
|
** full-scan search. |
|
541
|
*/ |
|
542
|
void search_sql_setup(sqlite3 *db){ |
|
543
|
static int once = 0; |
|
544
|
static const int enc = SQLITE_UTF8|SQLITE_INNOCUOUS; |
|
545
|
if( once++ ) return; |
|
546
|
sqlite3_create_function(db, "search_match", -1, enc, 0, |
|
547
|
search_match_sqlfunc, 0, 0); |
|
548
|
sqlite3_create_function(db, "search_score", 0, enc, 0, |
|
549
|
search_score_sqlfunc, 0, 0); |
|
550
|
sqlite3_create_function(db, "search_snippet", 0, enc, 0, |
|
551
|
search_snippet_sqlfunc, 0, 0); |
|
552
|
sqlite3_create_function(db, "search_init", -1, enc, 0, |
|
553
|
search_init_sqlfunc, 0, 0); |
|
554
|
sqlite3_create_function(db, "stext", 3, enc, 0, |
|
555
|
search_stext_sqlfunc, 0, 0); |
|
556
|
sqlite3_create_function(db, "title", 3, enc, 0, |
|
557
|
search_title_sqlfunc, 0, 0); |
|
558
|
sqlite3_create_function(db, "body", 3, enc, 0, |
|
559
|
search_body_sqlfunc, 0, 0); |
|
560
|
sqlite3_create_function(db, "urlencode", 1, enc, 0, |
|
561
|
search_urlencode_sqlfunc, 0, 0); |
|
562
|
} |
|
563
|
|
|
564
|
/* |
|
565
|
** Testing the search function. |
|
566
|
** |
|
567
|
** COMMAND: search* |
|
568
|
** |
|
569
|
** Usage: %fossil search [OPTIONS] PATTERN... |
|
570
|
** |
|
571
|
** Search the repository for PATTERN and show matches. Depending on |
|
572
|
** options and how the administrator has search configured for the |
|
573
|
** repository, the search can cover: |
|
574
|
** |
|
575
|
** * check-in comments (-c) |
|
576
|
** * embedded documentation (--docs) |
|
577
|
** * forum posts (--forum) |
|
578
|
** * tickets (--tickets) |
|
579
|
** * tech notes (--technotes) |
|
580
|
** * wiki pages (--wiki) |
|
581
|
** * built-in fossil help text (-h) |
|
582
|
** * all of the above (-a) |
|
583
|
** |
|
584
|
** Use options below to select the scope of the search. The |
|
585
|
** default is check-in comments only (-c). |
|
586
|
** |
|
587
|
** Output is colorizif the NO_COLOR environment |
|
588
|
** variable is not set. Use the "--highlight 0" option to disable colorization |
|
589
|
** or use "--highlight 91" to force it on. Change the argument to --highlight |
|
590
|
** to change the colori] = 0; |
|
591
|
ii = -1 "1". |
|
592
|
** |
|
593
|
** Options: |
|
594
|
** -a|--all Search everything |
|
595
|
** -c|--checkins Search check-in comments |
|
596
|
** --docs Search embedded documentation |
|
597
|
** --forum Search forum posts |
|
598
|
-highlight N Used VT100 color Nhow the administra. 0 means "off". |
|
599
|
** -n|--limit N Limit output to N matches |
|
600
|
** --technotes Search tech notes |
|
601
|
** --tickets Search tickets |
|
602
|
** -W|--width WIDTH Set display width to WIDTH columns, 0 for |
|
603
|
** unlimited. Defaults to the terminal's width. |
|
604
|
** --wiki Search wiki |
|
605
|
*/ |
|
606
|
void search_cmd(void){ |
|
607
|
Blob pattern; |
|
608
|
int i; |
|
609
|
Blob sql = empty_blob; |
|
610
|
Stmt q; |
|
611
|
int iBest; |
|
612
|
int srchFlags = 0; |
|
613
|
int bFts = 1; /* Use FTS search by default now */ |
|
614
|
char fAll = NULL != find_option("all", "a", 0); |
|
615
|
const char *zLimit = find_option("limit","n",1); |
|
616
|
const char *zScope = 0; |
|
617
|
const char *zWidth = find_option("width","W",1); |
|
618
|
int bDebug = find_option("debug",0,0)!=0; /* Undocumented */ |
|
619
|
int nLimit = zLimit ? atoi(zLimit) : -1000; |
|
620
|
int width; |
|
621
|
int nTty = 0; /* VT100 highlight color for matching text */ |
|
622
|
const char *zHighlight = 0; |
|
623
|
int bFlags = 0; /* DB open flags */ |
|
624
|
|
|
625
|
nTty = terminal_is_vt100(); |
|
626
|
|
|
627
|
/* Undocumented option to change highlight color */ |
|
628
|
zHighlight = find_option("highlight",0,1); |
|
629
|
if( zHighlight ) nTty = atoi(zHighlight); |
|
630
|
|
|
631
|
/* Undocumented option (legacy) */ |
|
632
|
zScope = find_option("scope",0,1); |
|
633
|
|
|
634
|
if( find_option("fts",0,0)!=0 ) bFts = 1; /* Undocumented legacy */ |
|
635
|
if( find_option("legacy",0,0)!=0 ) bFts = 0; /* Undocumented */ |
|
636
|
|
|
637
|
if( zWidth ){ |
|
638
|
width = atoi(zWidth); |
|
639
|
if( (width!=0) && (width<=20) ){ |
|
640
|
fossil_fatal("-W|--width value must be >20 or 0"); |
|
641
|
} |
|
642
|
}else{ |
|
643
|
width = -1; |
|
644
|
} |
|
645
|
if( zScope ){ |
|
646
|
for(i=0; zScope[i]; i++){ |
|
647
|
switch( zScope[i] ){ |
|
648
|
case 'a': srchFlags = SRCH_ALL; break; |
|
649
|
case 'c': srchFlags |= SRCH_CKIN; break; |
|
650
|
case 'd': srchFlags |= SRCH_DOC; break; |
|
651
|
case 'e': srchFlags |= SRCH_TECHNOTE; break; |
|
652
|
case 'f': srchFlags |= SRCH_FORUM; break; |
|
653
|
case 'h': srchFlags |= SRCH_HELP; break; |
|
654
|
case 't': srchFlags |= SRCH_TKT; break; |
|
655
|
case 'w': srchFlags |= SRCH_WIKI; break; |
|
656
|
} |
|
657
|
} |
|
658
|
bFts = 1; |
|
659
|
} |
|
660
|
if( find_option("all","a",0) ){ srchFlags |= SRCH_ALL; bFts = 1; } |
|
661
|
if( find_option("bi-help","h",0) ){ srchFlags |= SRCH_HELP; bFts = 1; } |
|
662
|
if( find_option("checkins","c",0) ){ srchFlags |= SRCH_CKIN; bFts = 1; } |
|
663
|
if( find_option("docs",0,0) ){ srchFlags |= SRCH_DOC; bFts = 1; } |
|
664
|
if( find_option("forum",0,0) ){ srchFlags |= SRCH_FORUM; bFts = 1; } |
|
665
|
if( find_option("technotes",0,0) ){ srchFlags |= SRCH_TECHNOTE; bFts = 1; } |
|
666
|
if( find_option("tickets",0,0) ){ srchFlags |= SRCH_TKT; bFts = 1; } |
|
667
|
if( find_option("wiki",0,0) ){ srchFlags |= SRCH_WIKI; bFts = 1; } |
|
668
|
|
|
669
|
/* If no search objects are specified, default to "check-in comments" */ |
|
670
|
if( srchFlags==0 ) srchFlags = SRCH_CKIN; |
|
671
|
|
|
672
|
if( srchFlags==SRCH_HELP ) bFlags = OPEN_OK_NOT_FOUND|OPEN_SUBSTITUTE; |
|
673
|
db_find_and_open_repository(bFlags, 0); |
|
674
|
verify_all_options(); |
|
675
|
if( g.argc<3 ) return; |
|
676
|
login_set_capabilities("s", 0); |
|
677
|
if( search_restrict(srchFlags)==0 && (srchFlags & SRCH_HELP)==0 ){ |
|
678
|
const char *zC1 = 0, *zPlural = "s"; |
|
679
|
if( srchFlags & SRCH_TECHNOTE ){ zC1 = "technote"; } |
|
680
|
if( srchFlags & SRCH_TKT ){ zC1 = "ticket"; } |
|
681
|
if( srchFlags & SRCH_FORUM ){ zC1 = "forum"; zPlural = ""; } |
|
682
|
if( srchFlags & SRCH_DOC ){ zC1 = "document"; } |
|
683
|
if( srchFlags & SRCH_WIKI ){ zC1 = "wiki"; zPlural = ""; } |
|
684
|
if( srchFlags & SRCH_CKIN ){ zC1 = "check-in"; } |
|
685
|
fossil_print( |
|
686
|
"Search of %s%s is disabled on this repository.\n" |
|
687
|
"Enable using \"fossil fts-config enable %s\".\n", |
|
688
|
zC1, zPlural, zC1 |
|
689
|
); |
|
690
|
return; |
|
691
|
} |
|
692
|
|
|
693
|
blob_init(&pattern, g.argv[2], -1); |
|
694
|
for(i=3; i<g.argc; i++){ |
|
695
|
blob_appendf(&pattern, " %s", g.argv[i]); |
|
696
|
} |
|
697
|
if( bFts ){ |
|
698
|
/* Search using FTS */ |
|
699
|
Blob com; |
|
700
|
Blob snip; |
|
701
|
const char *zPattern = blob_str(&pattern); |
|
702
|
search_sql_setup(g.db); |
|
703
|
add_content_sql_commands(g.db); |
|
704
|
db_multi_exec( |
|
705
|
"CREATE TEMP TABLE x(label,url,score,id,date,snip);" |
|
706
|
); |
|
707
|
if( !search_index_exists() ){ |
|
708
|
search_fullscan(zPattern, srchFlags); /* Full-scan search */ |
|
709
|
}else{ |
|
710
|
search_update_index(srchFlags); /* Update the index */ |
|
711
|
search_indexed(zPattern, srchFlags); /* Indexed search */ |
|
712
|
if( srchFlags & SRCH_HELP ){ |
|
713
|
search_fullscan(zPattern, SRCH_HELP); |
|
714
|
} |
|
715
|
} |
|
716
|
db_prepare(&q, "SELECT snip, label, score, id, date" |
|
717
|
" FROM x" |
|
718
|
" ORDER BY score DESC, date DESC;"); |
|
719
|
blob_init(&com, 0, 0); |
|
720
|
blob_init(&snip, 0, 0); |
|
721
|
if( width<0 ) width = terminal_get_width(80); |
|
722
|
while( db_step(&q)==SQLITE_ROW ){ |
|
723
|
const char *zSnippet = db_column_text(&q, 0); |
|
724
|
const char *zLabel = db_column_text(&q, 1); |
|
725
|
const char *zDate = db_column_text(&q, 4); |
|
726
|
const char *zScore = db_column_text(&q, 2); |
|
727
|
const char *zId = db_column_text(&q, 3); |
|
728
|
char *zOrig; |
|
729
|
blob_appendf(&snip, "%s", zSnippet); |
|
730
|
zOrig = blob_materialize(&snip); |
|
731
|
blob_init(&snip, 0, 0); |
|
732
|
html_to_plaintext(zOrig, &snip, (nTty?HTOT_VT100:0)|HTOT_FLOW|HTOT_TRIM); |
|
733
|
fossil_free(zOrig); |
|
734
|
blob_appendf(&com, "%s\n%s\n%s", zLabel, blob_str(&snip), zDate); |
|
735
|
if( bDebug ){ |
|
736
|
blob_appendf(&com," score: %s id: %s", zScore, zId); |
|
737
|
} |
|
738
|
comment_print(blob_str(&com), 0, 5, width, |
|
739
|
COMMENT_PRINT_TRIM_CRLF | |
|
740
|
COMMENT_PRINT_WORD_BREAK | |
|
741
|
COMMENT_PRINT_TRIM_SPACE); |
|
742
|
blob_reset(&com); |
|
743
|
blob_reset(&snip); |
|
744
|
if( nLimit>=1 ){ |
|
745
|
nLimit--; |
|
746
|
if( nLimit==0 ) break; |
|
747
|
} |
|
748
|
} |
|
749
|
db_finalize(&q); |
|
750
|
blob_reset(&pattern); |
|
751
|
}else{ |
|
752
|
/* Legacy timeline search (the default) */ |
|
753
|
(void)search_init(blob_str(&pattern),"*","*","...",SRCHFLG_STATIC); |
|
754
|
blob_reset(&pattern); |
|
755
|
search_sql_setup(g.db); |
|
756
|
|
|
757
|
db_multi_exec( |
|
758
|
"CREATE TEMP TABLE srch(rid,uuid,date,comment,x);" |
|
759
|
"CREATE INDEX srch_idx1 ON srch(x);" |
|
760
|
"INSERT INTO srch(rid,uuid,date,comment,x)" |
|
761
|
" SELECT blob.rid, uuid, datetime(event.mtime,toLocal())," |
|
762
|
" coalesce(ecomment,comment)," |
|
763
|
" search_score()" |
|
764
|
" FROM event, blob" |
|
765
|
" WHERE blob.rid=event.objid" |
|
766
|
" AND search_match(coalesce(ecomment,comment));" |
|
767
|
); |
|
768
|
iBest = db_int(0, "SELECT max(x) FROM srch"); |
|
769
|
blob_append(&sql, |
|
770
|
"SELECT rid, uuid, date, comment, 0, 0 FROM srch " |
|
771
|
"WHERE 1 ", -1); |
|
772
|
if(!fAll){ |
|
773
|
blob_append_sql(&sql,"AND x>%d ", iBest/3); |
|
774
|
} |
|
775
|
blob_append(&sql, "ORDER BY x DESC, date DESC ", -1); |
|
776
|
db_prepare(&q, "%s", blob_sql_text(&sql)); |
|
777
|
blob_reset(&sql); |
|
778
|
print_timeline(&q, nLimit, width, 0, 0); |
|
779
|
db_finalize(&q); |
|
780
|
} |
|
781
|
} |
|
782
|
|
|
783
|
#if INTERFACE |
|
784
|
/* What to search for */ |
|
785
|
#define SRCH_CKIN 0x0001 /* Search over check-in comments */ |
|
786
|
#define SRCH_DOC 0x0002 /* Search over embedded documents */ |
|
787
|
#define SRCH_TKT 0x0004 /* Search over tickets */ |
|
788
|
#define SRCH_WIKI 0x0008 /* Search over wiki */ |
|
789
|
#define SRCH_TECHNOTE 0x0010 /* Search over tech notes */ |
|
790
|
#define SRCH_FORUM 0x0020 /* Search over forum messages */ |
|
791
|
#define SRCH_HELP 0x0040 /* Search built-in help (full-scan only) */ |
|
792
|
#define SRCH_ALL 0x007f /* Search over everything */ |
|
793
|
#endif |
|
794
|
|
|
795
|
/* |
|
796
|
** Remove bits from srchFlags which are disallowed by either the |
|
797
|
** current server configuration or by user permissions. Return |
|
798
|
** the revised search flags mask. |
|
799
|
** |
|
800
|
** If bFlex is true, that means allow through the SRCH_HELP option |
|
801
|
** even if it is not explicitly enabled. |
|
802
|
*/ |
|
803
|
unsigned int search_restrict(unsigned int srchFlags){ |
|
804
|
static unsigned int knownGood = 0; |
|
805
|
static unsigned int knownBad = 0; |
|
806
|
static const struct { unsigned m; const char *zKey; } aSetng[] = { |
|
807
|
{ SRCH_CKIN, "search-ci" }, |
|
808
|
{ SRCH_DOC, "search-doc" }, |
|
809
|
{ SRCH_TKT, "search-tkt" }, |
|
810
|
{ SRCH_WIKI, "search-wiki" }, |
|
811
|
{ SRCH_TECHNOTE, "search-technote" }, |
|
812
|
{ SRCH_FORUM, "search-forum" }, |
|
813
|
{ SRCH_HELP, "search-help" }, |
|
814
|
}; |
|
815
|
int i; |
|
816
|
if( g.perm.Read==0 ) srchFlags &= ~(SRCH_CKIN|SRCH_DOC|SRCH_TECHNOTE); |
|
817
|
if( g.perm.RdTkt==0 ) srchFlags &= ~(SRCH_TKT); |
|
818
|
if( g.perm.RdWiki==0 ) srchFlags &= ~(SRCH_WIKI); |
|
819
|
if( g.perm.RdForum==0) srchFlags &= ~(SRCH_FORUM); |
|
820
|
for(i=0; i<count(aSetng); i++){ |
|
821
|
unsigned int m = aSetng[i].m; |
|
822
|
if( (srchFlags & m)==0 ) continue; |
|
823
|
if( ((knownGood|knownBad) & m)!=0 ) continue; |
|
824
|
if( db_get_boolean(aSetng[i].zKey,0) ){ |
|
825
|
knownGood |= m; |
|
826
|
}else{ |
|
827
|
knownBad |= m; |
|
828
|
} |
|
829
|
} |
|
830
|
return srchFlags & ~knownBad; |
|
831
|
} |
|
832
|
|
|
833
|
/* |
|
834
|
** When this routine is called, there already exists a table |
|
835
|
** |
|
836
|
** x(label,url,score,id,snip). |
|
837
|
** |
|
838
|
** label: The "name" of the document containing the match |
|
839
|
** url: A URL for the document |
|
840
|
** score: How well the document matched |
|
841
|
** id: The document id. Format: xNNNNN, x: type, N: number |
|
842
|
** snip: A snippet for the match |
|
843
|
** |
|
844
|
** And the srchFlags parameter has been validated. This routine |
|
845
|
** fills the X table with search results using a full-scan search. |
|
846
|
** |
|
847
|
** The companion indexed search routine is search_indexed(). |
|
848
|
*/ |
|
849
|
LOCAL void search_fullscan( |
|
850
|
const char *zPattern, /* The query pattern */ |
|
851
|
unsigned int srchFlags /* What to search over */ |
|
852
|
){ |
|
853
|
search_init(zPattern, "<mark>", "</mark>", " ... ", |
|
854
|
SRCHFLG_STATIC|SRCHFLG_HTML); |
|
855
|
if( (srchFlags & SRCH_DOC)!=0 ){ |
|
856
|
char *zDocGlob = db_get("doc-glob",""); |
|
857
|
const char *zMainBranch = db_main_branch(); |
|
858
|
char *zDocBr = db_get("doc-branch", zMainBranch); |
|
859
|
if( zDocGlob && zDocGlob[0] && zDocBr && zDocBr[0] ){ |
|
860
|
Glob * pGlob = glob_create(zDocBr) |
|
861
|
/* We're misusing a Glob as a list of comma-/space-delimited |
|
862
|
** tokens. We're not actually doing glob matches here. */; |
|
863
|
int i; |
|
864
|
db_multi_exec( |
|
865
|
"CREATE VIRTUAL TABLE IF NOT EXISTS temp.foci USING files_of_checkin;" |
|
866
|
); |
|
867
|
for( i = 0; i < pGlob->nPattern; ++i ){ |
|
868
|
const char * zBranch = pGlob->azPattern[i]; |
|
869
|
db_multi_exec( |
|
870
|
"INSERT INTO x(label,url,score,id,date,snip)" |
|
871
|
" SELECT printf('Document: %%s',title('d',blob.rid,foci.filename))," |
|
872
|
" printf('/doc/%T/%%s',foci.filename)," |
|
873
|
" search_score()," |
|
874
|
" 'd'||blob.rid," |
|
875
|
" (SELECT datetime(event.mtime) FROM event" |
|
876
|
" WHERE objid=symbolic_name_to_rid(%Q))," |
|
877
|
" search_snippet()" |
|
878
|
" FROM foci CROSS JOIN blob" |
|
879
|
" WHERE checkinID=symbolic_name_to_rid(%Q)" |
|
880
|
" AND blob.uuid=foci.uuid" |
|
881
|
" AND search_match(title('d',blob.rid,foci.filename)," |
|
882
|
" body('d',blob.rid,foci.filename))" |
|
883
|
" AND %z", |
|
884
|
zBranch, zBranch, zBranch, glob_expr("foci.filename", zDocGlob) |
|
885
|
); |
|
886
|
} |
|
887
|
glob_free(pGlob); |
|
888
|
} |
|
889
|
fossil_free(zDocGlob); |
|
890
|
fossil_free(zDocBr); |
|
891
|
} |
|
892
|
if( (srchFlags & SRCH_WIKI)!=0 ){ |
|
893
|
db_multi_exec( |
|
894
|
"WITH wiki(name,rid,mtime) AS (" |
|
895
|
" SELECT substr(tagname,6), tagxref.rid, max(tagxref.mtime)" |
|
896
|
" FROM tag, tagxref" |
|
897
|
" WHERE tag.tagname GLOB 'wiki-*'" |
|
898
|
" AND tagxref.tagid=tag.tagid" |
|
899
|
" GROUP BY 1" |
|
900
|
")" |
|
901
|
"INSERT INTO x(label,url,score,id,date,snip)" |
|
902
|
" SELECT printf('Wiki: %%s',name)," |
|
903
|
" printf('/wiki?name=%%s',urlencode(name))," |
|
904
|
" search_score()," |
|
905
|
" 'w'||rid," |
|
906
|
" datetime(mtime)," |
|
907
|
" search_snippet()" |
|
908
|
" FROM wiki" |
|
909
|
" WHERE search_match(title('w',rid,name),body('w',rid,name));" |
|
910
|
); |
|
911
|
} |
|
912
|
if( (srchFlags & SRCH_CKIN)!=0 ){ |
|
913
|
db_multi_exec( |
|
914
|
"WITH ckin(uuid,rid,mtime) AS (" |
|
915
|
" SELECT blob.uuid, event.objid, event.mtime" |
|
916
|
" FROM event, blob" |
|
917
|
" WHERE event.type='ci'" |
|
918
|
" AND blob.rid=event.objid" |
|
919
|
")" |
|
920
|
"INSERT INTO x(label,url,score,id,date,snip)" |
|
921
|
" SELECT printf('Check-in [%%.10s] on %%s',uuid,datetime(mtime))," |
|
922
|
" printf('/timeline?c=%%s',uuid)," |
|
923
|
" search_score()," |
|
924
|
" 'c'||rid," |
|
925
|
" datetime(mtime)," |
|
926
|
" search_snippet()" |
|
927
|
" FROM ckin" |
|
928
|
" WHERE search_match('',body('c',rid,NULL));" |
|
929
|
); |
|
930
|
} |
|
931
|
if( (srchFlags & SRCH_TKT)!=0 ){ |
|
932
|
db_multi_exec( |
|
933
|
"INSERT INTO x(label,url,score,id,date,snip)" |
|
934
|
" SELECT printf('Ticket: %%s (%%s)',title('t',tkt_id,NULL)," |
|
935
|
"datetime(tkt_mtime))," |
|
936
|
" printf('/tktview/%%.20s',tkt_uuid)," |
|
937
|
" search_score()," |
|
938
|
" 't'||tkt_id," |
|
939
|
" datetime(tkt_mtime)," |
|
940
|
" search_snippet()" |
|
941
|
" FROM ticket" |
|
942
|
" WHERE search_match(title('t',tkt_id,NULL),body('t',tkt_id,NULL));" |
|
943
|
); |
|
944
|
} |
|
945
|
if( (srchFlags & SRCH_TECHNOTE)!=0 ){ |
|
946
|
db_multi_exec( |
|
947
|
"WITH technote(uuid,rid,mtime) AS (" |
|
948
|
" SELECT substr(tagname,7), tagxref.rid, max(tagxref.mtime)" |
|
949
|
" FROM tag, tagxref" |
|
950
|
" WHERE tag.tagname GLOB 'event-*'" |
|
951
|
" AND tagxref.tagid=tag.tagid" |
|
952
|
" GROUP BY 1" |
|
953
|
")" |
|
954
|
"INSERT INTO x(label,url,score,id,date,snip)" |
|
955
|
" SELECT printf('Tech Note: %%s',uuid)," |
|
956
|
" printf('/technote/%%s',uuid)," |
|
957
|
" search_score()," |
|
958
|
" 'e'||rid," |
|
959
|
" datetime(mtime)," |
|
960
|
" search_snippet()" |
|
961
|
" FROM technote" |
|
962
|
" WHERE search_match('',body('e',rid,NULL));" |
|
963
|
); |
|
964
|
} |
|
965
|
if( (srchFlags & SRCH_FORUM)!=0 ){ |
|
966
|
db_multi_exec( |
|
967
|
"INSERT INTO x(label,url,score,id,date,snip)" |
|
968
|
" SELECT 'Forum '||comment," |
|
969
|
" '/forumpost/'||uuid," |
|
970
|
" search_score()," |
|
971
|
" 'f'||rid," |
|
972
|
" datetime(event.mtime)," |
|
973
|
" search_snippet()" |
|
974
|
" FROM event JOIN blob on event.objid=blob.rid" |
|
975
|
" WHERE search_match('',body('f',rid,NULL));" |
|
976
|
); |
|
977
|
} |
|
978
|
if( (srchFlags & SRCH_HELP)!=0 ){ |
|
979
|
const char *zPrefix; |
|
980
|
helptext_vtab_register(g.db); |
|
981
|
if( srchFlags==SRCH_HELP ){ |
|
982
|
zPrefix = "The"; |
|
983
|
}else{ |
|
984
|
zPrefix = "Built-in help for the"; |
|
985
|
} |
|
986
|
db_multi_exec( |
|
987
|
"INSERT INTO x(label,url,score,id,snip)" |
|
988
|
" SELECT format('%q \"%%s\" %%s',name,type)," |
|
989
|
" '/help/'||name," |
|
990
|
" search_score()," |
|
991
|
" 'h'||rowid," |
|
992
|
" search_snippet()" |
|
993
|
" FROM helptext" |
|
994
|
" WHERE search_match(format('the \"%%s\" %%s',name,type)," |
|
995
|
" helptext.helptext);", |
|
996
|
zPrefix |
|
997
|
); |
|
998
|
} |
|
999
|
} |
|
1000
|
|
|
1001
|
/* |
|
1002
|
** Number of significant bits in a u32 |
|
1003
|
*/ |
|
1004
|
static int nbits(u32 x){ |
|
1005
|
int n = 0; |
|
1006
|
while( x ){ n++; x >>= 1; } |
|
1007
|
return n; |
|
1008
|
} |
|
1009
|
|
|
1010
|
/* |
|
1011
|
** Implemenation of the rank() function used with rank(matchinfo(*,'pcsx')). |
|
1012
|
*/ |
|
1013
|
static void search_rank_sqlfunc( |
|
1014
|
sqlite3_context *context, |
|
1015
|
int argc, |
|
1016
|
sqlite3_value **argv |
|
1017
|
){ |
|
1018
|
const unsigned *aVal = (unsigned int*)sqlite3_value_blob(argv[0]); |
|
1019
|
int nVal = sqlite3_value_bytes(argv[0])/4; |
|
1020
|
int nCol; /* Number of columns in the index */ |
|
1021
|
int nTerm; /* Number of search terms in the query */ |
|
1022
|
int i, j; /* Loop counter */ |
|
1023
|
double r = 0.0; /* Score */ |
|
1024
|
const unsigned *aX, *aS; |
|
1025
|
|
|
1026
|
if( nVal<2 ) return; |
|
1027
|
nTerm = aVal[0]; |
|
1028
|
nCol = aVal[1]; |
|
1029
|
if( nVal<2+3*nCol*nTerm+nCol ) return; |
|
1030
|
aS = aVal+2; |
|
1031
|
aX = aS+nCol; |
|
1032
|
for(j=0; j<nCol; j++){ |
|
1033
|
double x; |
|
1034
|
if( aS[j]>0 ){ |
|
1035
|
x = 0.0; |
|
1036
|
for(i=0; i<nTerm; i++){ |
|
1037
|
int hits_this_row; |
|
1038
|
int hits_all_rows; |
|
1039
|
int rows_with_hit; |
|
1040
|
double avg_hits_per_row; |
|
1041
|
|
|
1042
|
hits_this_row = aX[j + i*nCol*3]; |
|
1043
|
if( hits_this_row==0 )continue; |
|
1044
|
hits_all_rows = aX[j + i*nCol*3 + 1]; |
|
1045
|
rows_with_hit = aX[j + i*nCol*3 + 2]; |
|
1046
|
if( rows_with_hit==0 ) continue; |
|
1047
|
avg_hits_per_row = hits_all_rows/(double)rows_with_hit; |
|
1048
|
x += hits_this_row/(avg_hits_per_row*nbits(rows_with_hit)); |
|
1049
|
} |
|
1050
|
x *= (1<<((30*(aS[j]-1))/nTerm)); |
|
1051
|
}else{ |
|
1052
|
x = 0.0; |
|
1053
|
} |
|
1054
|
r = r*10.0 + x; |
|
1055
|
} |
|
1056
|
#define SEARCH_DEBUG_RANK 0 |
|
1057
|
#if SEARCH_DEBUG_RANK |
|
1058
|
{ |
|
1059
|
Blob x; |
|
1060
|
blob_init(&x,0,0); |
|
1061
|
blob_appendf(&x,"%08x", (int)r); |
|
1062
|
for(i=0; i<nVal; i++){ |
|
1063
|
blob_appendf(&x," %d", aVal[i]); |
|
1064
|
} |
|
1065
|
blob_appendf(&x," r=%g", r); |
|
1066
|
sqlite3_result_text(context, blob_str(&x), -1, fossil_free); |
|
1067
|
} |
|
1068
|
#else |
|
1069
|
sqlite3_result_double(context, r); |
|
1070
|
#endif |
|
1071
|
} |
|
1072
|
|
|
1073
|
/* |
|
1074
|
** Expects a search pattern string. Makes a copy of the string, |
|
1075
|
** replaces all non-alphanum ASCII characters with a space, and |
|
1076
|
** lower-cases all upper-case ASCII characters. The intent is to avoid |
|
1077
|
** causing errors in FTS5 searches with inputs which contain AND, OR, |
|
1078
|
** and symbols like #. The caller is responsible for passing the |
|
1079
|
** result to fossil_free(). |
|
1080
|
*/ |
|
1081
|
char *search_simplify_pattern(const char * zPattern){ |
|
1082
|
char *zPat = mprintf("%s",zPattern); |
|
1083
|
int i; |
|
1084
|
for(i=0; zPat[i]; i++){ |
|
1085
|
if( (zPat[i]&0x80)==0 && !fossil_isalnum(zPat[i]) ) zPat[i] = ' '; |
|
1086
|
if( fossil_isupper(zPat[i]) ) zPat[i] = fossil_tolower(zPat[i]); |
|
1087
|
} |
|
1088
|
for(i--; i>=0 && zPat[i]==' '; i--){} |
|
1089
|
if( i<0 ){ |
|
1090
|
fossil_free(zPat); |
|
1091
|
zPat = mprintf("\"\""); |
|
1092
|
} |
|
1093
|
return zPat; |
|
1094
|
} |
|
1095
|
|
|
1096
|
/* |
|
1097
|
** When this routine is called, there already exists a table |
|
1098
|
** |
|
1099
|
** x(label,url,score,id,snip). |
|
1100
|
** |
|
1101
|
** label: The "name" of the document containing the match |
|
1102
|
** url: A URL for the document |
|
1103
|
** score: How well the document matched |
|
1104
|
** id: The document id. Format: xNNNNN, x: type, N: number |
|
1105
|
** snip: A snippet for the match |
|
1106
|
** |
|
1107
|
** And the srchFlags parameter has been validated. This routine |
|
1108
|
** fills the X table with search results using FTS indexed search. |
|
1109
|
** |
|
1110
|
** The companion full-scan search routine is search_fullscan(). |
|
1111
|
*/ |
|
1112
|
LOCAL void search_indexed( |
|
1113
|
const char *zPattern, /* The query pattern */ |
|
1114
|
unsigned int srchFlags /* What to search over */ |
|
1115
|
){ |
|
1116
|
Blob sql; |
|
1117
|
char *zPat; |
|
1118
|
static const char *zSnippetCall; |
|
1119
|
if( srchFlags==0 ) return; |
|
1120
|
sqlite3_create_function(g.db, "rank", 1, SQLITE_UTF8|SQLITE_INNOCUOUS, 0, |
|
1121
|
search_rank_sqlfunc, 0, 0); |
|
1122
|
zPat = search_simplify_pattern(zPattern); |
|
1123
|
blob_init(&sql, 0, 0); |
|
1124
|
if( search_index_type(0)==4 ){ |
|
1125
|
/* If this repo is still using the legacy FTS5 search index, then |
|
1126
|
** the snippet() function is slightly different */ |
|
1127
|
zSnippetCall = "snippet(ftsidx,'<mark>','</mark>',' ... ',-1,35)"; |
|
1128
|
}else{ |
|
1129
|
/* This is the common case - Using newer FTS5 search index */ |
|
1130
|
zSnippetCall = "snippet(ftsidx,-1,'<mark>','</mark>',' ... ',35)"; |
|
1131
|
} |
|
1132
|
blob_appendf(&sql, |
|
1133
|
"INSERT INTO x(label,url,score,id,date,snip) " |
|
1134
|
" SELECT ftsdocs.label," |
|
1135
|
" ftsdocs.url," |
|
1136
|
" rank(matchinfo(ftsidx,'pcsx'))," |
|
1137
|
" ftsdocs.type || ftsdocs.rid," |
|
1138
|
" datetime(ftsdocs.mtime)," |
|
1139
|
" %s" |
|
1140
|
" FROM ftsidx CROSS JOIN ftsdocs" |
|
1141
|
" WHERE ftsidx MATCH %Q" |
|
1142
|
" AND ftsdocs.rowid=ftsidx.rowid", |
|
1143
|
zSnippetCall /*safe-for-%s*/, zPat |
|
1144
|
); |
|
1145
|
fossil_free(zPat); |
|
1146
|
if( srchFlags!=SRCH_ALL ){ |
|
1147
|
const char *zSep = " AND ("; |
|
1148
|
static const struct { unsigned m; char c; } aMask[] = { |
|
1149
|
{ SRCH_CKIN, 'c' }, |
|
1150
|
{ SRCH_DOC, 'd' }, |
|
1151
|
{ SRCH_TKT, 't' }, |
|
1152
|
{ SRCH_WIKI, 'w' }, |
|
1153
|
{ SRCH_TECHNOTE, 'e' }, |
|
1154
|
{ SRCH_FORUM, 'f' }, |
|
1155
|
{ SRCH_HELP, 'h' }, |
|
1156
|
}; |
|
1157
|
int i; |
|
1158
|
for(i=0; i<count(aMask); i++){ |
|
1159
|
if( srchFlags & aMask[i].m ){ |
|
1160
|
blob_appendf(&sql, "%sftsdocs.type='%c'", zSep, aMask[i].c); |
|
1161
|
zSep = " OR "; |
|
1162
|
} |
|
1163
|
} |
|
1164
|
blob_append(&sql,")",1); |
|
1165
|
} |
|
1166
|
db_multi_exec("%s",blob_str(&sql)/*safe-for-%s*/); |
|
1167
|
#if SEARCH_DEBUG_RANK |
|
1168
|
db_multi_exec("UPDATE x SET label=printf('%%s (score=%%s)',label,score)"); |
|
1169
|
#endif |
|
1170
|
} |
|
1171
|
|
|
1172
|
/* |
|
1173
|
** If z[] is of the form "<mark>TEXT</mark>" where TEXT contains |
|
1174
|
** no white-space or punctuation, then return the length of the mark. |
|
1175
|
*/ |
|
1176
|
static int isSnippetMark(const char *z){ |
|
1177
|
int n; |
|
1178
|
if( strncmp(z,"<mark>",6)!=0 ) return 0; |
|
1179
|
n = 6; |
|
1180
|
while( fossil_isalnum(z[n]) ) n++; |
|
1181
|
if( strncmp(&z[n],"</mark>",7)!=0 ) return 0; |
|
1182
|
return n+7; |
|
1183
|
} |
|
1184
|
|
|
1185
|
/* |
|
1186
|
** Return a copy of zSnip (in memory obtained from fossil_malloc()) that |
|
1187
|
** has all "<" characters, other than those on <mark> and </mark>, |
|
1188
|
** converted into "<". This is similar to htmlize() except that |
|
1189
|
** <mark> and </mark> are preserved. |
|
1190
|
*/ |
|
1191
|
static char *cleanSnippet(const char *zSnip){ |
|
1192
|
int i; |
|
1193
|
int n = 0; |
|
1194
|
char *z; |
|
1195
|
if( zSnip==0 ) zSnip = ""; |
|
1196
|
for(i=0; zSnip[i]; i++) if( zSnip[i]=='<' ) n++; |
|
1197
|
z = fossil_malloc( i+n*4+1 ); |
|
1198
|
i = 0; |
|
1199
|
while( zSnip[0] ){ |
|
1200
|
if( zSnip[0]=='<' ){ |
|
1201
|
n = isSnippetMark(zSnip); |
|
1202
|
if( n ){ |
|
1203
|
memcpy(&z[i], zSnip, n); |
|
1204
|
zSnip += n; |
|
1205
|
i += n; |
|
1206
|
continue; |
|
1207
|
}else{ |
|
1208
|
memcpy(&z[i], "<", 4); |
|
1209
|
i += 4; |
|
1210
|
zSnip++; |
|
1211
|
} |
|
1212
|
}else{ |
|
1213
|
z[i++] = zSnip[0]; |
|
1214
|
zSnip++; |
|
1215
|
} |
|
1216
|
} |
|
1217
|
z[i] = 0; |
|
1218
|
return z; |
|
1219
|
} |
|
1220
|
|
|
1221
|
|
|
1222
|
/* |
|
1223
|
** This routine generates web-page output for a search operation. |
|
1224
|
** Other web-pages can invoke this routine to add search results |
|
1225
|
** in the middle of the page. |
|
1226
|
** |
|
1227
|
** This routine works for both full-scan and indexed search. The |
|
1228
|
** appropriate low-level search routine is called according to the |
|
1229
|
** current configuration. |
|
1230
|
** |
|
1231
|
** Return the number of rows. |
|
1232
|
*/ |
|
1233
|
int search_run_and_output( |
|
1234
|
const char *zPattern, /* The query pattern */ |
|
1235
|
unsigned int srchFlags, /* What to search over */ |
|
1236
|
int fDebug /* Extra debugging output */ |
|
1237
|
){ |
|
1238
|
Stmt q; |
|
1239
|
int nRow = 0; |
|
1240
|
int nLimit = db_get_int("search-limit", 100); |
|
1241
|
|
|
1242
|
if( P("searchlimit")!=0 ){ |
|
1243
|
nLimit = atoi(P("searchlimit")); |
|
1244
|
} |
|
1245
|
srchFlags = search_restrict(srchFlags) | (srchFlags & SRCH_HELP); |
|
1246
|
if( srchFlags==0 ) return 0; |
|
1247
|
search_sql_setup(g.db); |
|
1248
|
add_content_sql_commands(g.db); |
|
1249
|
db_multi_exec( |
|
1250
|
"CREATE TEMP TABLE x(label,url,score,id,date,snip);" |
|
1251
|
); |
|
1252
|
if( !search_index_exists() ){ |
|
1253
|
search_fullscan(zPattern, srchFlags); /* Full-scan search */ |
|
1254
|
}else{ |
|
1255
|
search_update_index(srchFlags); /* Update the index, if necessary */ |
|
1256
|
search_indexed(zPattern, srchFlags); /* Indexed search */ |
|
1257
|
if( srchFlags & SRCH_HELP ){ |
|
1258
|
search_fullscan(zPattern, SRCH_HELP); |
|
1259
|
} |
|
1260
|
} |
|
1261
|
db_prepare(&q, "SELECT url, snip, label, score, id, substr(date,1,10)" |
|
1262
|
" FROM x" |
|
1263
|
" ORDER BY score DESC, date DESC;"); |
|
1264
|
while( db_step(&q)==SQLITE_ROW ){ |
|
1265
|
const char *zUrl = db_column_text(&q, 0); |
|
1266
|
const char *zSnippet = db_column_text(&q, 1); |
|
1267
|
const char *zLabel = db_column_text(&q, 2); |
|
1268
|
const char *zDate = db_column_text(&q, 5); |
|
1269
|
if( nRow==0 ){ |
|
1270
|
@ <ol> |
|
1271
|
} |
|
1272
|
nRow++; |
|
1273
|
@ <li><p><a href='%R%s(zUrl)'>%h(zLabel)</a> |
|
1274
|
if( fDebug ){ |
|
1275
|
@ (%e(db_column_double(&q,3)), %s(db_column_text(&q,4)) |
|
1276
|
} |
|
1277
|
@ <br><span class='snippet'>%z(cleanSnippet(zSnippet)) \ |
|
1278
|
if( zLabel && zDate && zDate[0] && strstr(zLabel,zDate)==0 ){ |
|
1279
|
@ <small>(%h(zDate))</small> |
|
1280
|
} |
|
1281
|
@ </span></li> |
|
1282
|
if( nLimit && nRow>=nLimit ) break; |
|
1283
|
} |
|
1284
|
db_finalize(&q); |
|
1285
|
if( nRow ){ |
|
1286
|
@ </ol> |
|
1287
|
} |
|
1288
|
return nRow; |
|
1289
|
} |
|
1290
|
|
|
1291
|
/* |
|
1292
|
** Generate some HTML for doing search. At a minimum include the |
|
1293
|
** Search-Text entry form. If the "s" query parameter is present, also |
|
1294
|
** show search results. |
|
1295
|
** |
|
1296
|
** The srchFlags parameter restricts the set of documents to be searched. |
|
1297
|
** srchFlags should normally be either a single search category or all |
|
1298
|
** categories. Any srchFlags with two or more bits set |
|
1299
|
** is treated like SRCH_ALL for display purposes. |
|
1300
|
** |
|
1301
|
** This routine automatically restricts srchFlag according to user |
|
1302
|
** permissions and the server configuration. The entry box is shown |
|
1303
|
** disabled if srchFlags is 0 after these restrictions are applied. |
|
1304
|
** |
|
1305
|
** The mFlags value controls options: |
|
1306
|
** |
|
1307
|
** 0x01 If the y= query parameter is present, use it as an addition |
|
1308
|
** restriction what to search. |
|
1309
|
** |
|
1310
|
** 0x02 Show nothing if search is disabled. |
|
1311
|
** |
|
1312
|
** Return true if there are search results. |
|
1313
|
*/ |
|
1314
|
int search_screen(unsigned srchAllowed, int mFlags){ |
|
1315
|
const char *zType = 0; |
|
1316
|
const char *zClass = 0; |
|
1317
|
const char *zDisable1; |
|
1318
|
const char *zDisable2; |
|
1319
|
const char *zPattern; |
|
1320
|
int fDebug = PB("debug"); |
|
1321
|
int haveResult = 0; |
|
1322
|
int srchThisTime; |
|
1323
|
const char *zY = PD("y","all"); |
|
1324
|
if( zY[0]=='h' && zY[1]==0 ){ |
|
1325
|
srchAllowed = search_restrict(srchAllowed) | (srchAllowed & SRCH_HELP); |
|
1326
|
}else{ |
|
1327
|
srchAllowed = search_restrict(srchAllowed); |
|
1328
|
} |
|
1329
|
switch( srchAllowed ){ |
|
1330
|
case SRCH_CKIN: zType = " Check-ins"; zClass = "Ckin"; break; |
|
1331
|
case SRCH_DOC: zType = " Docs"; zClass = "Doc"; break; |
|
1332
|
case SRCH_TKT: zType = " Tickets"; zClass = "Tkt"; break; |
|
1333
|
case SRCH_WIKI: zType = " Wiki"; zClass = "Wiki"; break; |
|
1334
|
case SRCH_TECHNOTE: zType = " Tech Notes"; zClass = "Note"; break; |
|
1335
|
case SRCH_FORUM: zType = " Forum"; zClass = "Frm"; break; |
|
1336
|
case SRCH_HELP: zType = " Help"; zClass = "Hlp"; break; |
|
1337
|
} |
|
1338
|
if( srchAllowed==0 ){ |
|
1339
|
if( mFlags & 0x02 ) return 0; |
|
1340
|
zDisable1 = " disabled"; |
|
1341
|
zDisable2 = " disabled"; |
|
1342
|
zPattern = ""; |
|
1343
|
}else{ |
|
1344
|
zDisable1 = ""; /* Was: " autofocus" */ |
|
1345
|
zDisable2 = ""; |
|
1346
|
zPattern = PD("s",""); |
|
1347
|
} |
|
1348
|
@ <form method='GET' action='%R/%T(g.zPath)'> |
|
1349
|
if( zClass ){ |
|
1350
|
@ <div class='searchForm searchForm%s(zClass)'> |
|
1351
|
}else{ |
|
1352
|
@ <div class='searchForm'> |
|
1353
|
} |
|
1354
|
@ <input type="text" name="s" size="40" value="%h(zPattern)"%s(zDisable1)> |
|
1355
|
srchThisTime = srchAllowed; |
|
1356
|
if( (mFlags & 0x01)!=0 && (srchAllowed & (srchAllowed-1))!=0 ){ |
|
1357
|
static const struct { |
|
1358
|
const char *z; |
|
1359
|
const char *zNm; |
|
1360
|
unsigned m; |
|
1361
|
} aY[] = { |
|
1362
|
{ "all", "All", SRCH_ALL }, |
|
1363
|
{ "c", "Check-ins", SRCH_CKIN }, |
|
1364
|
{ "d", "Docs", SRCH_DOC }, |
|
1365
|
{ "t", "Tickets", SRCH_TKT }, |
|
1366
|
{ "w", "Wiki", SRCH_WIKI }, |
|
1367
|
{ "e", "Tech Notes", SRCH_TECHNOTE }, |
|
1368
|
{ "f", "Forum", SRCH_FORUM }, |
|
1369
|
{ "h", "Help", SRCH_HELP }, |
|
1370
|
}; |
|
1371
|
int i; |
|
1372
|
@ <select size='1' name='y'> |
|
1373
|
for(i=0; i<count(aY); i++){ |
|
1374
|
if( (aY[i].m & srchAllowed)==0 ) continue; |
|
1375
|
if( aY[i].m==SRCH_HELP && fossil_strcmp(zY,"h")!=0 |
|
1376
|
&& search_restrict(SRCH_HELP)==0 ) continue; |
|
1377
|
cgi_printf("<option value='%s'", aY[i].z); |
|
1378
|
if( fossil_strcmp(zY,aY[i].z)==0 ){ |
|
1379
|
srchThisTime &= aY[i].m; |
|
1380
|
cgi_printf(" selected"); |
|
1381
|
} |
|
1382
|
cgi_printf(">%s</option>\n", aY[i].zNm); |
|
1383
|
} |
|
1384
|
@ </select> |
|
1385
|
} |
|
1386
|
if( fDebug ){ |
|
1387
|
@ <input type="hidden" name="debug" value="1"> |
|
1388
|
} |
|
1389
|
@ <input type="submit" value="Search%s(zType)"%s(zDisable2)> |
|
1390
|
if( srchAllowed==0 && srchThisTime==0 ){ |
|
1391
|
@ <p class="generalError">Search is disabled</p> |
|
1392
|
} |
|
1393
|
@ </div></form> |
|
1394
|
while( fossil_isspace(zPattern[0]) ) zPattern++; |
|
1395
|
if( zPattern[0] ){ |
|
1396
|
if( zClass ){ |
|
1397
|
@ <div class='searchResult searchResult%s(zClass)'> |
|
1398
|
}else{ |
|
1399
|
@ <div class='searchResult'> |
|
1400
|
} |
|
1401
|
if( search_run_and_output(zPattern, srchThisTime, fDebug)==0 ){ |
|
1402
|
@ <p class='searchEmpty'>No matches for: <span>%h(zPattern)</span></p> |
|
1403
|
} |
|
1404
|
@ </div> |
|
1405
|
haveResult = 1; |
|
1406
|
} |
|
1407
|
return haveResult; |
|
1408
|
} |
|
1409
|
|
|
1410
|
/* |
|
1411
|
** WEBPAGE: search |
|
1412
|
** |
|
1413
|
** Search for check-in comments, documents, tickets, or wiki that |
|
1414
|
** match a user-supplied pattern. |
|
1415
|
** |
|
1416
|
** s=PATTERN Specify the full-text pattern to search for |
|
1417
|
** y=TYPE What to search. |
|
1418
|
** c -> check-ins, |
|
1419
|
** d -> documentation, |
|
1420
|
** t -> tickets, |
|
1421
|
** w -> wiki, |
|
1422
|
** e -> tech notes, |
|
1423
|
** f -> forum, |
|
1424
|
** h -> built-in help, |
|
1425
|
** all -> everything. |
|
1426
|
*/ |
|
1427
|
void search_page(void){ |
|
1428
|
const int isSearch = P("s")!=0; |
|
1429
|
login_check_credentials(); |
|
1430
|
style_header("Search%s", isSearch ? " Results" : ""); |
|
1431
|
cgi_check_for_malice(); |
|
1432
|
search_screen(SRCH_ALL, 1); |
|
1433
|
style_finish_page(); |
|
1434
|
} |
|
1435
|
|
|
1436
|
|
|
1437
|
/* |
|
1438
|
** This is a helper function for search_stext(). Writing into pOut |
|
1439
|
** the search text obtained from pIn according to zMimetype. |
|
1440
|
** |
|
1441
|
** If a title is not specified in zTitle (e.g. for wiki pages that do not |
|
1442
|
** include the title in the body), it is determined from the page content. |
|
1443
|
** |
|
1444
|
** The title of the document is the first line of text. All subsequent |
|
1445
|
** lines are the body. If the document has no title, the first line |
|
1446
|
** is blank. |
|
1447
|
*/ |
|
1448
|
static void get_stext_by_mimetype( |
|
1449
|
Blob *pIn, |
|
1450
|
const char *zMimetype, |
|
1451
|
const char *zTitle, |
|
1452
|
Blob *pOut |
|
1453
|
){ |
|
1454
|
Blob html, title; |
|
1455
|
Blob *pHtml = &html; |
|
1456
|
blob_init(&html, 0, 0); |
|
1457
|
if( zTitle==0 ){ |
|
1458
|
blob_init(&title, 0, 0); |
|
1459
|
}else{ |
|
1460
|
blob_init(&title, zTitle, -1); |
|
1461
|
} |
|
1462
|
if( zMimetype==0 ) zMimetype = "text/plain"; |
|
1463
|
if( fossil_strcmp(zMimetype,"text/x-fossil-wiki")==0 ){ |
|
1464
|
if( blob_size(&title) ){ |
|
1465
|
wiki_convert(pIn, &html, 0); |
|
1466
|
}else{ |
|
1467
|
Blob tail; |
|
1468
|
blob_init(&tail, 0, 0); |
|
1469
|
if( wiki_find_title(pIn, &title, &tail) ){ |
|
1470
|
blob_appendf(pOut, "%s\n", blob_str(&title)); |
|
1471
|
wiki_convert(&tail, &html, 0); |
|
1472
|
blob_reset(&tail); |
|
1473
|
}else{ |
|
1474
|
blob_append(pOut, "\n", 1); |
|
1475
|
wiki_convert(pIn, &html, 0); |
|
1476
|
} |
|
1477
|
} |
|
1478
|
html_to_plaintext(blob_str(&html), pOut, 0); |
|
1479
|
}else if( fossil_strcmp(zMimetype,"text/x-markdown")==0 ){ |
|
1480
|
markdown_to_html(pIn, blob_size(&title) ? NULL : &title, &html); |
|
1481
|
}else if( fossil_strcmp(zMimetype,"text/html")==0 ){ |
|
1482
|
if( blob_size(&title)==0 ) doc_is_embedded_html(pIn, &title); |
|
1483
|
pHtml = pIn; |
|
1484
|
} |
|
1485
|
blob_appendf(pOut, "%s\n", blob_str(&title)); |
|
1486
|
if( blob_size(pHtml) ){ |
|
1487
|
html_to_plaintext(blob_str(pHtml), pOut, 0); |
|
1488
|
}else{ |
|
1489
|
blob_append(pOut, blob_buffer(pIn), blob_size(pIn)); |
|
1490
|
} |
|
1491
|
blob_reset(&html); |
|
1492
|
blob_reset(&title); |
|
1493
|
} |
|
1494
|
|
|
1495
|
/* |
|
1496
|
** Query pQuery is pointing at a single row of output. Append a text |
|
1497
|
** representation of every text-compatible column to pAccum. |
|
1498
|
*/ |
|
1499
|
static void append_all_ticket_fields(Blob *pAccum, Stmt *pQuery, int iTitle){ |
|
1500
|
int n = db_column_count(pQuery); |
|
1501
|
int i; |
|
1502
|
const char *zMime = 0; |
|
1503
|
if( iTitle>=0 && iTitle<n ){ |
|
1504
|
if( db_column_type(pQuery,iTitle)==SQLITE_TEXT ){ |
|
1505
|
blob_append(pAccum, db_column_text(pQuery,iTitle), -1); |
|
1506
|
} |
|
1507
|
blob_append(pAccum, "\n", 1); |
|
1508
|
} |
|
1509
|
for(i=0; i<n; i++){ |
|
1510
|
const char *zColName = db_column_name(pQuery,i); |
|
1511
|
int eType = db_column_type(pQuery,i); |
|
1512
|
if( i==iTitle ) continue; |
|
1513
|
if( fossil_strnicmp(zColName,"tkt_",4)==0 ) continue; |
|
1514
|
if( fossil_strnicmp(zColName,"private_",8)==0 ) continue; |
|
1515
|
if( eType==SQLITE_BLOB || eType==SQLITE_NULL ) continue; |
|
1516
|
if( fossil_stricmp(zColName,"mimetype")==0 ){ |
|
1517
|
zMime = db_column_text(pQuery,i); |
|
1518
|
if( fossil_strcmp(zMime,"text/plain")==0 ) zMime = 0; |
|
1519
|
}else if( zMime==0 || eType!=SQLITE_TEXT ){ |
|
1520
|
blob_appendf(pAccum, "%s: %s |\n", zColName, db_column_text(pQuery,i)); |
|
1521
|
}else{ |
|
1522
|
Blob txt; |
|
1523
|
blob_init(&txt, db_column_text(pQuery,i), -1); |
|
1524
|
blob_appendf(pAccum, "%s: ", zColName); |
|
1525
|
get_stext_by_mimetype(&txt, zMime, NULL, pAccum); |
|
1526
|
blob_append(pAccum, " |", 2); |
|
1527
|
blob_reset(&txt); |
|
1528
|
} |
|
1529
|
} |
|
1530
|
} |
|
1531
|
|
|
1532
|
|
|
1533
|
/* |
|
1534
|
** Return "search text" - a reduced version of a document appropriate for |
|
1535
|
** full text search and/or for constructing a search result snippet. |
|
1536
|
** |
|
1537
|
** cType: d Embedded documentation |
|
1538
|
** w Wiki page |
|
1539
|
** c Check-in comment |
|
1540
|
** t Ticket text |
|
1541
|
** e Tech note |
|
1542
|
** f Forum |
|
1543
|
** |
|
1544
|
** rid The RID of an artifact that defines the object |
|
1545
|
** being searched. |
|
1546
|
** |
|
1547
|
** zName Name of the object being searched. This is used |
|
1548
|
** only to help figure out the mimetype (text/plain, |
|
1549
|
** test/html, test/x-fossil-wiki, or text/x-markdown) |
|
1550
|
** so that the code can know how to simplify the text. |
|
1551
|
*/ |
|
1552
|
void search_stext( |
|
1553
|
char cType, /* Type of document */ |
|
1554
|
int rid, /* BLOB.RID or TAG.TAGID value for document */ |
|
1555
|
const char *zName, /* Auxiliary information */ |
|
1556
|
Blob *pOut /* OUT: Initialize to the search text */ |
|
1557
|
){ |
|
1558
|
blob_init(pOut, 0, 0); |
|
1559
|
switch( cType ){ |
|
1560
|
case 'd': { /* Documents */ |
|
1561
|
Blob doc; |
|
1562
|
content_get(rid, &doc); |
|
1563
|
blob_to_utf8_no_bom(&doc, 0); |
|
1564
|
get_stext_by_mimetype(&doc, mimetype_from_name(zName), NULL, pOut); |
|
1565
|
blob_reset(&doc); |
|
1566
|
break; |
|
1567
|
} |
|
1568
|
case 'f': /* Forum messages */ |
|
1569
|
case 'e': /* Tech Notes */ |
|
1570
|
case 'w': { /* Wiki */ |
|
1571
|
Manifest *pWiki = manifest_get(rid, |
|
1572
|
cType == 'e' ? CFTYPE_EVENT : |
|
1573
|
cType == 'f' ? CFTYPE_FORUM : CFTYPE_WIKI, 0); |
|
1574
|
Blob wiki; |
|
1575
|
if( pWiki==0 ) break; |
|
1576
|
if( cType=='f' ){ |
|
1577
|
blob_init(&wiki, 0, 0); |
|
1578
|
if( pWiki->zThreadTitle ){ |
|
1579
|
blob_appendf(&wiki, "<h1>%h</h1>\n", pWiki->zThreadTitle); |
|
1580
|
} |
|
1581
|
blob_appendf(&wiki, "From %s:\n\n%s", pWiki->zUser, pWiki->zWiki); |
|
1582
|
}else{ |
|
1583
|
blob_init(&wiki, pWiki->zWiki, -1); |
|
1584
|
} |
|
1585
|
get_stext_by_mimetype(&wiki, wiki_filter_mimetypes(pWiki->zMimetype), |
|
1586
|
cType=='w' ? pWiki->zWikiTitle : NULL, pOut); |
|
1587
|
blob_reset(&wiki); |
|
1588
|
manifest_destroy(pWiki); |
|
1589
|
break; |
|
1590
|
} |
|
1591
|
case 'c': { /* Check-in Comments */ |
|
1592
|
static Stmt q; |
|
1593
|
static int isPlainText = -1; |
|
1594
|
db_static_prepare(&q, |
|
1595
|
"SELECT coalesce(ecomment,comment)" |
|
1596
|
" ||' (user: '||coalesce(euser,user,'?')" |
|
1597
|
" ||', tags: '||" |
|
1598
|
" (SELECT group_concat(substr(tag.tagname,5),',')" |
|
1599
|
" FROM tag, tagxref" |
|
1600
|
" WHERE tagname GLOB 'sym-*' AND tag.tagid=tagxref.tagid" |
|
1601
|
" AND tagxref.rid=event.objid AND tagxref.tagtype>0)" |
|
1602
|
" ||')'" |
|
1603
|
" FROM event WHERE objid=:x AND type='ci'"); |
|
1604
|
if( isPlainText<0 ){ |
|
1605
|
isPlainText = db_get_boolean("timeline-plaintext",0); |
|
1606
|
} |
|
1607
|
db_bind_int(&q, ":x", rid); |
|
1608
|
if( db_step(&q)==SQLITE_ROW ){ |
|
1609
|
blob_append(pOut, "\n", 1); |
|
1610
|
if( isPlainText ){ |
|
1611
|
db_column_blob(&q, 0, pOut); |
|
1612
|
}else{ |
|
1613
|
Blob x; |
|
1614
|
blob_init(&x,0,0); |
|
1615
|
db_column_blob(&q, 0, &x); |
|
1616
|
get_stext_by_mimetype(&x, "text/x-fossil-wiki", NULL, pOut); |
|
1617
|
blob_reset(&x); |
|
1618
|
} |
|
1619
|
} |
|
1620
|
db_reset(&q); |
|
1621
|
break; |
|
1622
|
} |
|
1623
|
case 't': { /* Tickets */ |
|
1624
|
static Stmt q1; |
|
1625
|
static int iTitle = -1; |
|
1626
|
db_static_prepare(&q1, "SELECT * FROM ticket WHERE tkt_id=:rid"); |
|
1627
|
db_bind_int(&q1, ":rid", rid); |
|
1628
|
if( db_step(&q1)==SQLITE_ROW ){ |
|
1629
|
if( iTitle<0 ){ |
|
1630
|
int n = db_column_count(&q1); |
|
1631
|
for(iTitle=0; iTitle<n; iTitle++){ |
|
1632
|
if( fossil_stricmp(db_column_name(&q1,iTitle),"title")==0 ) break; |
|
1633
|
} |
|
1634
|
} |
|
1635
|
append_all_ticket_fields(pOut, &q1, iTitle); |
|
1636
|
} |
|
1637
|
db_reset(&q1); |
|
1638
|
if( db_table_exists("repository","ticketchng") ){ |
|
1639
|
static Stmt q2; |
|
1640
|
db_static_prepare(&q2, "SELECT * FROM ticketchng WHERE tkt_id=:rid" |
|
1641
|
" ORDER BY tkt_mtime"); |
|
1642
|
db_bind_int(&q2, ":rid", rid); |
|
1643
|
while( db_step(&q2)==SQLITE_ROW ){ |
|
1644
|
append_all_ticket_fields(pOut, &q2, -1); |
|
1645
|
} |
|
1646
|
db_reset(&q2); |
|
1647
|
} |
|
1648
|
break; |
|
1649
|
} |
|
1650
|
} |
|
1651
|
} |
|
1652
|
|
|
1653
|
/* |
|
1654
|
** This routine is a wrapper around search_stext(). |
|
1655
|
** |
|
1656
|
** This routine looks up the search text, stores it in an internal |
|
1657
|
** buffer, and returns a pointer to the text. Subsequent requests |
|
1658
|
** for the same document return the same pointer. The returned pointer |
|
1659
|
** is valid until the next invocation of this routine. Call this routine |
|
1660
|
** with an eType of 0 to clear the cache. |
|
1661
|
*/ |
|
1662
|
char *search_stext_cached( |
|
1663
|
char cType, /* Type of document */ |
|
1664
|
int rid, /* BLOB.RID or TAG.TAGID value for document */ |
|
1665
|
const char *zName, /* Auxiliary information, for mimetype */ |
|
1666
|
int *pnTitle /* OUT: length of title in bytes excluding \n */ |
|
1667
|
){ |
|
1668
|
static struct { |
|
1669
|
Blob stext; /* Cached search text */ |
|
1670
|
char cType; /* The type */ |
|
1671
|
int rid; /* The RID */ |
|
1672
|
int nTitle; /* Number of bytes in title */ |
|
1673
|
} cache; |
|
1674
|
int i; |
|
1675
|
char *z; |
|
1676
|
if( cType!=cache.cType || rid!=cache.rid ){ |
|
1677
|
if( cache.rid>0 ){ |
|
1678
|
blob_reset(&cache.stext); |
|
1679
|
}else{ |
|
1680
|
blob_init(&cache.stext,0,0); |
|
1681
|
} |
|
1682
|
cache.cType = cType; |
|
1683
|
cache.rid = rid; |
|
1684
|
if( cType==0 ) return 0; |
|
1685
|
search_stext(cType, rid, zName, &cache.stext); |
|
1686
|
z = blob_str(&cache.stext); |
|
1687
|
for(i=0; z[i] && z[i]!='\n'; i++){} |
|
1688
|
cache.nTitle = i; |
|
1689
|
} |
|
1690
|
if( pnTitle ) *pnTitle = cache.nTitle; |
|
1691
|
return blob_str(&cache.stext); |
|
1692
|
} |
|
1693
|
|
|
1694
|
/* |
|
1695
|
** COMMAND: test-search-stext |
|
1696
|
** |
|
1697
|
** Usage: fossil test-search-stext TYPE RID NAME |
|
1698
|
** |
|
1699
|
** Compute the search text for document TYPE-RID whose name is NAME. |
|
1700
|
** The TYPE is one of "c", "d", "t", "w", or "e". The RID is the document |
|
1701
|
** ID. The NAME is used to figure out a mimetype to use for formatting |
|
1702
|
** the raw document text. |
|
1703
|
*/ |
|
1704
|
void test_search_stext(void){ |
|
1705
|
Blob out; |
|
1706
|
db_find_and_open_repository(0,0); |
|
1707
|
if( g.argc!=5 ) usage("TYPE RID NAME"); |
|
1708
|
search_stext(g.argv[2][0], atoi(g.argv[3]), g.argv[4], &out); |
|
1709
|
fossil_print("%s\n",blob_str(&out)); |
|
1710
|
blob_reset(&out); |
|
1711
|
} |
|
1712
|
|
|
1713
|
/* |
|
1714
|
** COMMAND: test-convert-stext |
|
1715
|
** |
|
1716
|
** Usage: fossil test-convert-stext FILE MIMETYPE |
|
1717
|
** |
|
1718
|
** Read the content of FILE and convert it to stext according to MIMETYPE. |
|
1719
|
** Send the result to standard output. |
|
1720
|
*/ |
|
1721
|
void test_convert_stext(void){ |
|
1722
|
Blob in, out; |
|
1723
|
db_find_and_open_repository(0,0); |
|
1724
|
if( g.argc!=4 ) usage("FILENAME MIMETYPE"); |
|
1725
|
blob_read_from_file(&in, g.argv[2], ExtFILE); |
|
1726
|
blob_init(&out, 0, 0); |
|
1727
|
get_stext_by_mimetype(&in, g.argv[3], NULL, &out); |
|
1728
|
fossil_print("%s\n",blob_str(&out)); |
|
1729
|
blob_reset(&in); |
|
1730
|
blob_reset(&out); |
|
1731
|
} |
|
1732
|
|
|
1733
|
/* |
|
1734
|
** The schema for the full-text index. The %s part must be an empty |
|
1735
|
** string or a comma followed by additional flags for the FTS virtual |
|
1736
|
** table. |
|
1737
|
*/ |
|
1738
|
static const char zFtsSchema[] = |
|
1739
|
@ -- One entry for each possible search result |
|
1740
|
@ CREATE TABLE IF NOT EXISTS repository.ftsdocs( |
|
1741
|
@ rowid INTEGER PRIMARY KEY, -- Maps to the ftsidx.rowid |
|
1742
|
@ type CHAR(1), -- Type of document |
|
1743
|
@ rid INTEGER, -- BLOB.RID or TAG.TAGID for the document |
|
1744
|
@ name TEXT, -- Additional document description |
|
1745
|
@ idxed BOOLEAN, -- True if currently in the index |
|
1746
|
@ label TEXT, -- Label to print on search results |
|
1747
|
@ url TEXT, -- URL to access this document |
|
1748
|
@ mtime DATE, -- Date when document created |
|
1749
|
@ bx TEXT, -- Temporary "body" content cache |
|
1750
|
@ UNIQUE(type,rid) |
|
1751
|
@ ); |
|
1752
|
@ CREATE INDEX repository.ftsdocIdxed ON ftsdocs(type,rid,name) WHERE idxed==0; |
|
1753
|
@ CREATE INDEX repository.ftsdocName ON ftsdocs(name) WHERE type='w'; |
|
1754
|
@ CREATE VIEW IF NOT EXISTS repository.ftscontent AS |
|
1755
|
@ SELECT rowid, type, rid, name, idxed, label, url, mtime, |
|
1756
|
@ title(type,rid,name) AS 'title', body(type,rid,name) AS 'body' |
|
1757
|
@ FROM ftsdocs; |
|
1758
|
@ CREATE VIRTUAL TABLE IF NOT EXISTS repository.ftsidx |
|
1759
|
@ USING fts5(content="ftscontent", title, body%s); |
|
1760
|
; |
|
1761
|
static const char zFtsDrop[] = |
|
1762
|
@ DROP TABLE IF EXISTS repository.ftsidx; |
|
1763
|
@ DROP VIEW IF EXISTS repository.ftscontent; |
|
1764
|
@ DROP TABLE IF EXISTS repository.ftsdocs; |
|
1765
|
@ DROP TABLE IF EXISTS repository.chatfts1; |
|
1766
|
; |
|
1767
|
|
|
1768
|
#if INTERFACE |
|
1769
|
/* |
|
1770
|
** Values for the search-tokenizer config option. |
|
1771
|
*/ |
|
1772
|
#define FTS5TOK_NONE 0 /* disabled */ |
|
1773
|
#define FTS5TOK_PORTER 1 /* porter stemmer */ |
|
1774
|
#define FTS5TOK_UNICODE61 2 /* unicode61 tokenizer */ |
|
1775
|
#define FTS5TOK_TRIGRAM 3 /* trigram tokenizer */ |
|
1776
|
#endif |
|
1777
|
|
|
1778
|
/* |
|
1779
|
** Cached FTS5TOK_xyz value for search_tokenizer_type() and |
|
1780
|
** friends. |
|
1781
|
*/ |
|
1782
|
static int iFtsTokenizer = -1; |
|
1783
|
|
|
1784
|
/* |
|
1785
|
** Returns one of the FTS5TOK_xyz values, depending on the value of |
|
1786
|
** the search-tokenizer config entry, defaulting to FTS5TOK_NONE. The |
|
1787
|
** result of the first call is cached for subsequent calls unless |
|
1788
|
** bRecheck is true. |
|
1789
|
*/ |
|
1790
|
int search_tokenizer_type(int bRecheck){ |
|
1791
|
char *z; |
|
1792
|
if( iFtsTokenizer>=0 && bRecheck==0 ){ |
|
1793
|
return iFtsTokenizer; |
|
1794
|
} |
|
1795
|
z = db_get("search-tokenizer",0); |
|
1796
|
if( 0==z ){ |
|
1797
|
iFtsTokenizer = FTS5TOK_NONE; |
|
1798
|
}else if(0==fossil_strcmp(z,"porter")){ |
|
1799
|
iFtsTokenizer = FTS5TOK_PORTER; |
|
1800
|
}else if(0==fossil_strcmp(z,"unicode61")){ |
|
1801
|
iFtsTokenizer = FTS5TOK_UNICODE61; |
|
1802
|
}else if(0==fossil_strcmp(z,"trigram")){ |
|
1803
|
iFtsTokenizer = FTS5TOK_TRIGRAM; |
|
1804
|
}else{ |
|
1805
|
iFtsTokenizer = is_truth(z) ? FTS5TOK_PORTER : FTS5TOK_NONE; |
|
1806
|
} |
|
1807
|
fossil_free(z); |
|
1808
|
return iFtsTokenizer; |
|
1809
|
} |
|
1810
|
|
|
1811
|
/* |
|
1812
|
** Returns a string in the form ",tokenize=X", where X is the string |
|
1813
|
** counterpart of the given FTS5TOK_xyz value. Returns "" if tokType |
|
1814
|
** does not correspond to a known FTS5 tokenizer. |
|
1815
|
*/ |
|
1816
|
const char * search_tokenize_arg_for_type(int tokType){ |
|
1817
|
switch( tokType ){ |
|
1818
|
case FTS5TOK_PORTER: return ",tokenize=porter"; |
|
1819
|
case FTS5TOK_UNICODE61: return ",tokenize=unicode61"; |
|
1820
|
case FTS5TOK_TRIGRAM: return ",tokenize=trigram"; |
|
1821
|
case FTS5TOK_NONE: |
|
1822
|
default: return ""; |
|
1823
|
} |
|
1824
|
} |
|
1825
|
|
|
1826
|
/* |
|
1827
|
** Returns a string value suitable for use as the search-tokenizer |
|
1828
|
** setting's value, depending on the value of z. If z is 0 then the |
|
1829
|
** current search-tokenizer value is used as the basis for formulating |
|
1830
|
** the result (which may differ from the current value but will have |
|
1831
|
** the same meaning). Any unknown/unsupported value is interpreted as |
|
1832
|
** "off". |
|
1833
|
*/ |
|
1834
|
const char *search_tokenizer_for_string(const char *z){ |
|
1835
|
char * zTmp = 0; |
|
1836
|
const char *zRc = 0; |
|
1837
|
|
|
1838
|
if( 0==z ){ |
|
1839
|
z = zTmp = db_get("search-tokenizer",0); |
|
1840
|
} |
|
1841
|
if( 0==z ){ |
|
1842
|
zRc = "off"; |
|
1843
|
}else if( 0==fossil_strcmp(z,"porter") ){ |
|
1844
|
zRc = "porter"; |
|
1845
|
}else if( 0==fossil_strcmp(z,"unicode61") ){ |
|
1846
|
zRc = "unicode61"; |
|
1847
|
}else if( 0==fossil_strcmp(z,"trigram") ){ |
|
1848
|
zRc = "trigram"; |
|
1849
|
}else{ |
|
1850
|
zRc = is_truth(z) ? "porter" : "off"; |
|
1851
|
} |
|
1852
|
fossil_free(zTmp); |
|
1853
|
return zRc; |
|
1854
|
} |
|
1855
|
|
|
1856
|
/* |
|
1857
|
** Sets the search-tokenizer config setting to the value of |
|
1858
|
** search_tokenizer_for_string(zName). |
|
1859
|
*/ |
|
1860
|
void search_set_tokenizer(const char *zName){ |
|
1861
|
db_set("search-tokenizer", search_tokenizer_for_string( zName ), 0); |
|
1862
|
iFtsTokenizer = -1; |
|
1863
|
} |
|
1864
|
|
|
1865
|
/* |
|
1866
|
** Create or drop the tables associated with a full-text index. |
|
1867
|
*/ |
|
1868
|
static int searchIdxExists = -1; |
|
1869
|
void search_create_index(void){ |
|
1870
|
const char *zExtra = |
|
1871
|
search_tokenize_arg_for_type(search_tokenizer_type(0)); |
|
1872
|
assert( zExtra ); |
|
1873
|
search_sql_setup(g.db); |
|
1874
|
db_multi_exec(zFtsSchema/*works-like:"%s"*/, zExtra/*safe-for-%s*/); |
|
1875
|
searchIdxExists = 1; |
|
1876
|
} |
|
1877
|
void search_drop_index(void){ |
|
1878
|
db_multi_exec(zFtsDrop/*works-like:""*/); |
|
1879
|
searchIdxExists = 0; |
|
1880
|
} |
|
1881
|
|
|
1882
|
/* |
|
1883
|
** Return true if the full-text search index exists. See also the |
|
1884
|
** search_index_type() function. |
|
1885
|
*/ |
|
1886
|
int search_index_exists(void){ |
|
1887
|
if( searchIdxExists<0 ){ |
|
1888
|
searchIdxExists = db_table_exists("repository","ftsdocs"); |
|
1889
|
} |
|
1890
|
return searchIdxExists; |
|
1891
|
} |
|
1892
|
|
|
1893
|
/* |
|
1894
|
** Determine which full-text search index is currently being used to |
|
1895
|
** add searching. Return values: |
|
1896
|
** |
|
1897
|
** 0 No search index is available |
|
1898
|
** 4 FTS3/4 |
|
1899
|
** 5 FTS5 |
|
1900
|
** |
|
1901
|
** Results are cached. Make the argument 1 to reset the cache. See |
|
1902
|
** also the search_index_exists() routine. |
|
1903
|
*/ |
|
1904
|
int search_index_type(int bReset){ |
|
1905
|
static int idxType = -1; |
|
1906
|
if( idxType<0 || bReset ){ |
|
1907
|
idxType = db_int(0, |
|
1908
|
"SELECT CASE WHEN sql GLOB '*fts4*' THEN 4 ELSE 5 END" |
|
1909
|
" FROM repository.sqlite_schema WHERE name='ftsidx'" |
|
1910
|
); |
|
1911
|
} |
|
1912
|
return idxType; |
|
1913
|
} |
|
1914
|
|
|
1915
|
/* |
|
1916
|
** Fill the FTSDOCS table with unindexed entries for everything |
|
1917
|
** in the repository. This uses INSERT OR IGNORE so entries already |
|
1918
|
** in FTSDOCS are unchanged. |
|
1919
|
*/ |
|
1920
|
void search_fill_index(void){ |
|
1921
|
if( !search_index_exists() ) return; |
|
1922
|
search_sql_setup(g.db); |
|
1923
|
db_multi_exec( |
|
1924
|
"INSERT OR IGNORE INTO ftsdocs(type,rid,idxed)" |
|
1925
|
" SELECT 'c', objid, 0 FROM event WHERE type='ci';" |
|
1926
|
); |
|
1927
|
db_multi_exec( |
|
1928
|
"WITH latest_wiki(rid,name,mtime) AS (" |
|
1929
|
" SELECT tagxref.rid, substr(tag.tagname,6), max(tagxref.mtime)" |
|
1930
|
" FROM tag, tagxref" |
|
1931
|
" WHERE tag.tagname GLOB 'wiki-*'" |
|
1932
|
" AND tagxref.tagid=tag.tagid" |
|
1933
|
" AND tagxref.value>0" |
|
1934
|
" GROUP BY 2" |
|
1935
|
") INSERT OR IGNORE INTO ftsdocs(type,rid,name,idxed)" |
|
1936
|
" SELECT 'w', rid, name, 0 FROM latest_wiki;" |
|
1937
|
); |
|
1938
|
db_multi_exec( |
|
1939
|
"INSERT OR IGNORE INTO ftsdocs(type,rid,idxed)" |
|
1940
|
" SELECT 't', tkt_id, 0 FROM ticket;" |
|
1941
|
); |
|
1942
|
db_multi_exec( |
|
1943
|
"INSERT OR IGNORE INTO ftsdocs(type,rid,name,idxed)" |
|
1944
|
" SELECT type, objid, comment, 0 FROM event WHERE type IN ('e','f');" |
|
1945
|
); |
|
1946
|
} |
|
1947
|
|
|
1948
|
/* |
|
1949
|
** The document described by cType,rid,zName is about to be added or |
|
1950
|
** updated. If the document has already been indexed, then unindex it |
|
1951
|
** now while we still have access to the old content. Add the document |
|
1952
|
** to the queue of documents that need to be indexed or reindexed. |
|
1953
|
*/ |
|
1954
|
void search_doc_touch(char cType, int rid, const char *zName){ |
|
1955
|
if( search_index_exists() && !content_is_private(rid) ){ |
|
1956
|
char zType[2]; |
|
1957
|
zType[0] = cType; |
|
1958
|
zType[1] = 0; |
|
1959
|
search_sql_setup(g.db); |
|
1960
|
db_multi_exec( |
|
1961
|
"DELETE FROM ftsidx WHERE rowid IN" |
|
1962
|
" (SELECT rowid FROM ftsdocs WHERE type=%Q AND rid=%d AND idxed)", |
|
1963
|
zType, rid |
|
1964
|
); |
|
1965
|
db_multi_exec( |
|
1966
|
"REPLACE INTO ftsdocs(type,rid,name,idxed)" |
|
1967
|
" VALUES(%Q,%d,%Q,0)", |
|
1968
|
zType, rid, zName |
|
1969
|
); |
|
1970
|
if( cType=='w' || cType=='e' ){ |
|
1971
|
db_multi_exec( |
|
1972
|
"DELETE FROM ftsidx WHERE rowid IN" |
|
1973
|
" (SELECT rowid FROM ftsdocs WHERE type='%c' AND name=%Q AND idxed)", |
|
1974
|
cType, zName |
|
1975
|
); |
|
1976
|
db_multi_exec( |
|
1977
|
"DELETE FROM ftsdocs WHERE type='%c' AND name=%Q AND rid!=%d", |
|
1978
|
cType, zName, rid |
|
1979
|
); |
|
1980
|
} |
|
1981
|
/* All forum posts are always indexed */ |
|
1982
|
} |
|
1983
|
} |
|
1984
|
|
|
1985
|
/* |
|
1986
|
** If the doc-glob and doc-br settings are valid for document search |
|
1987
|
** and if the latest check-in on doc-br is in the unindexed set of |
|
1988
|
** check-ins, then update all 'd' entries in FTSDOCS that have |
|
1989
|
** changed. |
|
1990
|
*/ |
|
1991
|
static void search_update_doc_index(void){ |
|
1992
|
const char *zMainBranch = db_main_branch(); |
|
1993
|
const char *zDocBranches = db_get("doc-branch", zMainBranch); |
|
1994
|
int i; |
|
1995
|
Glob * pGlob = glob_create(zDocBranches) |
|
1996
|
/* We're misusing a Glob as a list of comma-/space-delimited |
|
1997
|
** tokens. We're not actually doing glob matches here. */; |
|
1998
|
if( !pGlob ) return; |
|
1999
|
db_multi_exec( |
|
2000
|
"CREATE TEMP TABLE current_docs(rid INTEGER PRIMARY KEY, name);" |
|
2001
|
"CREATE VIRTUAL TABLE IF NOT EXISTS temp.foci USING files_of_checkin;" |
|
2002
|
); |
|
2003
|
for( i = 0; i < pGlob->nPattern; ++i ){ |
|
2004
|
const char *zDocBr = pGlob->azPattern[i]; |
|
2005
|
int ckid = symbolic_name_to_rid(zDocBr,"ci"); |
|
2006
|
double rTime; |
|
2007
|
if( !db_exists("SELECT 1 FROM ftsdocs WHERE type='c' AND rid=%d" |
|
2008
|
" AND NOT idxed", ckid) ) continue; |
|
2009
|
/* If we get this far, it means that changes to 'd' entries are |
|
2010
|
** required. */ |
|
2011
|
rTime = db_double(0.0, "SELECT mtime FROM event WHERE objid=%d", ckid); |
|
2012
|
db_multi_exec( |
|
2013
|
"INSERT OR IGNORE INTO current_docs(rid, name)" |
|
2014
|
" SELECT blob.rid, foci.filename FROM foci, blob" |
|
2015
|
" WHERE foci.checkinID=%d AND blob.uuid=foci.uuid" |
|
2016
|
" AND %z", |
|
2017
|
ckid, glob_expr("foci.filename", db_get("doc-glob","")) |
|
2018
|
); |
|
2019
|
db_multi_exec( |
|
2020
|
"DELETE FROM ftsidx WHERE rowid IN" |
|
2021
|
" (SELECT rowid FROM ftsdocs WHERE type='d'" |
|
2022
|
" AND rid NOT IN (SELECT rid FROM current_docs))" |
|
2023
|
); |
|
2024
|
db_multi_exec( |
|
2025
|
"DELETE FROM ftsdocs WHERE type='d'" |
|
2026
|
" AND rid NOT IN (SELECT rid FROM current_docs)" |
|
2027
|
); |
|
2028
|
db_multi_exec( |
|
2029
|
"INSERT OR IGNORE INTO ftsdocs(type,rid,name,idxed,label,bx,url,mtime)" |
|
2030
|
" SELECT 'd', rid, name, 0," |
|
2031
|
" title('d',rid,name)," |
|
2032
|
" body('d',rid,name)," |
|
2033
|
" printf('/doc/%T/%%s',urlencode(name))," |
|
2034
|
" %.17g" |
|
2035
|
" FROM current_docs", |
|
2036
|
zDocBr, rTime |
|
2037
|
); |
|
2038
|
db_multi_exec( |
|
2039
|
"INSERT INTO ftsidx(rowid,title,body)" |
|
2040
|
" SELECT rowid, label, bx FROM ftsdocs WHERE type='d' AND NOT idxed" |
|
2041
|
); |
|
2042
|
db_multi_exec( |
|
2043
|
"UPDATE ftsdocs SET" |
|
2044
|
" idxed=1," |
|
2045
|
" bx=NULL," |
|
2046
|
" label='Document: '||label" |
|
2047
|
" WHERE type='d' AND NOT idxed" |
|
2048
|
); |
|
2049
|
} |
|
2050
|
glob_free(pGlob); |
|
2051
|
} |
|
2052
|
|
|
2053
|
/* |
|
2054
|
** Deal with all of the unindexed 'c' terms in FTSDOCS |
|
2055
|
*/ |
|
2056
|
static void search_update_checkin_index(void){ |
|
2057
|
db_multi_exec( |
|
2058
|
"INSERT INTO ftsidx(rowid,title,body)" |
|
2059
|
" SELECT rowid, '', body('c',rid,NULL) FROM ftsdocs" |
|
2060
|
" WHERE type='c' AND NOT idxed;" |
|
2061
|
); |
|
2062
|
db_multi_exec( |
|
2063
|
"UPDATE ftsdocs SET idxed=1, name=NULL," |
|
2064
|
" (label,url,mtime) = " |
|
2065
|
" (SELECT printf('Check-in [%%.16s] on %%s',blob.uuid," |
|
2066
|
" datetime(event.mtime))," |
|
2067
|
" printf('/timeline?y=ci&c=%%.20s',blob.uuid)," |
|
2068
|
" event.mtime" |
|
2069
|
" FROM event, blob" |
|
2070
|
" WHERE event.objid=ftsdocs.rid" |
|
2071
|
" AND blob.rid=ftsdocs.rid)" |
|
2072
|
"WHERE ftsdocs.type='c' AND NOT ftsdocs.idxed" |
|
2073
|
); |
|
2074
|
} |
|
2075
|
|
|
2076
|
/* |
|
2077
|
** Deal with all of the unindexed 't' terms in FTSDOCS |
|
2078
|
*/ |
|
2079
|
static void search_update_ticket_index(void){ |
|
2080
|
db_multi_exec( |
|
2081
|
"INSERT INTO ftsidx(rowid,title,body)" |
|
2082
|
" SELECT rowid, title('t',rid,NULL), body('t',rid,NULL) FROM ftsdocs" |
|
2083
|
" WHERE type='t' AND NOT idxed;" |
|
2084
|
); |
|
2085
|
if( db_changes()==0 ) return; |
|
2086
|
db_multi_exec( |
|
2087
|
"UPDATE ftsdocs SET idxed=1, name=NULL," |
|
2088
|
" (label,url,mtime) =" |
|
2089
|
" (SELECT printf('Ticket: %%s (%%s)',title('t',tkt_id,null)," |
|
2090
|
" datetime(tkt_mtime))," |
|
2091
|
" printf('/tktview/%%.20s',tkt_uuid)," |
|
2092
|
" tkt_mtime" |
|
2093
|
" FROM ticket" |
|
2094
|
" WHERE tkt_id=ftsdocs.rid)" |
|
2095
|
"WHERE ftsdocs.type='t' AND NOT ftsdocs.idxed" |
|
2096
|
); |
|
2097
|
} |
|
2098
|
|
|
2099
|
/* |
|
2100
|
** Deal with all of the unindexed 'w' terms in FTSDOCS |
|
2101
|
*/ |
|
2102
|
static void search_update_wiki_index(void){ |
|
2103
|
db_multi_exec( |
|
2104
|
"INSERT INTO ftsidx(rowid,title,body)" |
|
2105
|
" SELECT rowid, title('w',rid,NULL),body('w',rid,NULL) FROM ftsdocs" |
|
2106
|
" WHERE type='w' AND NOT idxed;" |
|
2107
|
); |
|
2108
|
if( db_changes()==0 ) return; |
|
2109
|
db_multi_exec( |
|
2110
|
"UPDATE ftsdocs SET idxed=1," |
|
2111
|
" (name,label,url,mtime) = " |
|
2112
|
" (SELECT ftsdocs.name," |
|
2113
|
" 'Wiki: '||ftsdocs.name," |
|
2114
|
" '/wiki?name='||urlencode(ftsdocs.name)," |
|
2115
|
" tagxref.mtime" |
|
2116
|
" FROM tagxref WHERE tagxref.rid=ftsdocs.rid)" |
|
2117
|
" WHERE ftsdocs.type='w' AND NOT ftsdocs.idxed" |
|
2118
|
); |
|
2119
|
} |
|
2120
|
|
|
2121
|
/* |
|
2122
|
** Deal with all of the unindexed 'f' terms in FTSDOCS |
|
2123
|
*/ |
|
2124
|
static void search_update_forum_index(void){ |
|
2125
|
db_multi_exec( |
|
2126
|
"INSERT INTO ftsidx(rowid,title,body)" |
|
2127
|
" SELECT rowid, title('f',rid,NULL),body('f',rid,NULL) FROM ftsdocs" |
|
2128
|
" WHERE type='f' AND NOT idxed;" |
|
2129
|
); |
|
2130
|
if( db_changes()==0 ) return; |
|
2131
|
db_multi_exec( |
|
2132
|
"UPDATE ftsdocs SET idxed=1, name=NULL," |
|
2133
|
" (label,url,mtime) = " |
|
2134
|
" (SELECT 'Forum '||event.comment," |
|
2135
|
" '/forumpost/'||blob.uuid," |
|
2136
|
" event.mtime" |
|
2137
|
" FROM event, blob" |
|
2138
|
" WHERE event.objid=ftsdocs.rid" |
|
2139
|
" AND blob.rid=ftsdocs.rid)" |
|
2140
|
"WHERE ftsdocs.type='f' AND NOT ftsdocs.idxed" |
|
2141
|
); |
|
2142
|
} |
|
2143
|
|
|
2144
|
/* |
|
2145
|
** Deal with all of the unindexed 'e' terms in FTSDOCS |
|
2146
|
*/ |
|
2147
|
static void search_update_technote_index(void){ |
|
2148
|
db_multi_exec( |
|
2149
|
"INSERT INTO ftsidx(rowid,title,body)" |
|
2150
|
" SELECT rowid, title('e',rid,NULL),body('e',rid,NULL) FROM ftsdocs" |
|
2151
|
" WHERE type='e' AND NOT idxed;" |
|
2152
|
); |
|
2153
|
if( db_changes()==0 ) return; |
|
2154
|
db_multi_exec( |
|
2155
|
"UPDATE ftsdocs SET idxed=1," |
|
2156
|
" (name,label,url,mtime) = " |
|
2157
|
" (SELECT ftsdocs.name," |
|
2158
|
" 'Tech Note: '||ftsdocs.name," |
|
2159
|
" '/technote/'||substr(tag.tagname,7)," |
|
2160
|
" tagxref.mtime" |
|
2161
|
" FROM tagxref, tag USING (tagid)" |
|
2162
|
" WHERE tagxref.rid=ftsdocs.rid" |
|
2163
|
" AND tagname GLOB 'event-*')" |
|
2164
|
" WHERE ftsdocs.type='e' AND NOT ftsdocs.idxed" |
|
2165
|
); |
|
2166
|
} |
|
2167
|
|
|
2168
|
/* |
|
2169
|
** Deal with all of the unindexed entries in the FTSDOCS table - that |
|
2170
|
** is to say, all the entries with FTSDOCS.IDXED=0. Add them to the |
|
2171
|
** index. |
|
2172
|
*/ |
|
2173
|
void search_update_index(unsigned int srchFlags){ |
|
2174
|
if( !search_index_exists() ) return; |
|
2175
|
if( !db_exists("SELECT 1 FROM ftsdocs WHERE NOT idxed") ) return; |
|
2176
|
search_sql_setup(g.db); |
|
2177
|
db_unprotect(PROTECT_READONLY); |
|
2178
|
if( srchFlags & (SRCH_CKIN|SRCH_DOC) ){ |
|
2179
|
search_update_doc_index(); |
|
2180
|
search_update_checkin_index(); |
|
2181
|
} |
|
2182
|
if( srchFlags & SRCH_TKT ){ |
|
2183
|
search_update_ticket_index(); |
|
2184
|
} |
|
2185
|
if( srchFlags & SRCH_WIKI ){ |
|
2186
|
search_update_wiki_index(); |
|
2187
|
} |
|
2188
|
if( srchFlags & SRCH_TECHNOTE ){ |
|
2189
|
search_update_technote_index(); |
|
2190
|
} |
|
2191
|
if( srchFlags & SRCH_FORUM ){ |
|
2192
|
search_update_forum_index(); |
|
2193
|
} |
|
2194
|
db_protect_pop(); |
|
2195
|
} |
|
2196
|
|
|
2197
|
/* |
|
2198
|
** Construct, prepopulate, and then update the full-text index. |
|
2199
|
*/ |
|
2200
|
void search_rebuild_index(void){ |
|
2201
|
fossil_print("rebuilding the search index..."); |
|
2202
|
fflush(stdout); |
|
2203
|
search_create_index(); |
|
2204
|
search_fill_index(); |
|
2205
|
search_update_index(search_restrict(SRCH_ALL)); |
|
2206
|
if( db_table_exists("repository","chat") ){ |
|
2207
|
chat_rebuild_index(1); |
|
2208
|
} |
|
2209
|
fossil_print(" done\n"); |
|
2210
|
} |
|
2211
|
|
|
2212
|
/* |
|
2213
|
** COMMAND: fts-config* abbrv-subcom |
|
2214
|
** |
|
2215
|
** Usage: fossil fts-config ?SUBCOMMAND? ?ARGUMENT? |
|
2216
|
** |
|
2217
|
** The "fossil fts-config" command configures the full-text search capabilities |
|
2218
|
** of the repository. Subcommands: |
|
2219
|
** |
|
2220
|
** reindex Rebuild the search index. This is a no-op if |
|
2221
|
** index search is disabled |
|
2222
|
** |
|
2223
|
** index (on|off) Turn the search index on or off |
|
2224
|
** |
|
2225
|
** enable TYPE .. Enable search for TYPE. TYPE is one of: |
|
2226
|
** check-in, document, ticket, wiki, technote, |
|
2227
|
** forum, help, or all |
|
2228
|
** |
|
2229
|
** disable TYPE ... Disable search for TYPE |
|
2230
|
** |
|
2231
|
** tokenizer VALUE Select a tokenizer for indexed search. VALUE |
|
2232
|
** may be one of (porter, on, off, trigram, unicode61), |
|
2233
|
** and "on" is equivalent to "porter". Unindexed |
|
2234
|
** search never uses tokenization or stemming. |
|
2235
|
** |
|
2236
|
** The current search settings are displayed after any changes are applied. |
|
2237
|
** Run this command with no arguments to simply see the settings. |
|
2238
|
*/ |
|
2239
|
void fts_config_cmd(void){ |
|
2240
|
static const struct { |
|
2241
|
int iCmd; |
|
2242
|
const char *z; |
|
2243
|
} aCmd[] = { |
|
2244
|
{ 1, "reindex" }, |
|
2245
|
{ 2, "index" }, |
|
2246
|
{ 3, "disable" }, |
|
2247
|
{ 4, "enable" }, |
|
2248
|
{ 5, "tokenizer"}, |
|
2249
|
}; |
|
2250
|
static const struct { |
|
2251
|
const char *zSetting; |
|
2252
|
const char *zName; |
|
2253
|
const char *zSw; |
|
2254
|
} aSetng[] = { |
|
2255
|
{ "search-ci", "check-in search:", "c" }, |
|
2256
|
{ "search-doc", "document search:", "d" }, |
|
2257
|
{ "search-tkt", "ticket search:", "t" }, |
|
2258
|
{ "search-wiki", "wiki search:", "w" }, |
|
2259
|
{ "search-technote", "technote search:", "e" }, |
|
2260
|
{ "search-forum", "forum search:", "f" }, |
|
2261
|
{ "search-help", "built-in help search:", "h" }, |
|
2262
|
}; |
|
2263
|
char *zSubCmd = 0; |
|
2264
|
int i, j, n; |
|
2265
|
int iCmd = 0; |
|
2266
|
int iAction = 0; |
|
2267
|
db_find_and_open_repository(0, 0); |
|
2268
|
if( g.argc>2 ){ |
|
2269
|
zSubCmd = g.argv[2]; |
|
2270
|
n = (int)strlen(zSubCmd); |
|
2271
|
for(i=0; i<count(aCmd); i++){ |
|
2272
|
if( fossil_strncmp(aCmd[i].z, zSubCmd, n)==0 ) break; |
|
2273
|
} |
|
2274
|
if( i>=count(aCmd) ){ |
|
2275
|
Blob all; |
|
2276
|
blob_init(&all,0,0); |
|
2277
|
for(i=0; i<count(aCmd); i++) blob_appendf(&all, " %s", aCmd[i].z); |
|
2278
|
fossil_fatal("unknown \"%s\" - should be one of:%s", |
|
2279
|
zSubCmd, blob_str(&all)); |
|
2280
|
return; |
|
2281
|
} |
|
2282
|
iCmd = aCmd[i].iCmd; |
|
2283
|
} |
|
2284
|
g.perm.Read = 1; |
|
2285
|
g.perm.RdTkt = 1; |
|
2286
|
g.perm.RdWiki = 1; |
|
2287
|
if( iCmd==1 ){ |
|
2288
|
if( search_index_exists() ) iAction = 2; |
|
2289
|
} |
|
2290
|
if( iCmd==2 ){ |
|
2291
|
if( g.argc<3 ) usage("index (on|off)"); |
|
2292
|
iAction = 1 + is_truth(g.argv[3]); |
|
2293
|
} |
|
2294
|
db_begin_transaction(); |
|
2295
|
|
|
2296
|
/* Adjust search settings */ |
|
2297
|
if( iCmd==3 || iCmd==4 ){ |
|
2298
|
int k; |
|
2299
|
const char *zCtrl; |
|
2300
|
for(k=2; k<g.argc; k++){ |
|
2301
|
if( k==2 ){ |
|
2302
|
if( g.argc<4 ){ |
|
2303
|
zCtrl = "all"; |
|
2304
|
}else{ |
|
2305
|
zCtrl = g.argv[3]; |
|
2306
|
k++; |
|
2307
|
} |
|
2308
|
}else{ |
|
2309
|
zCtrl = g.argv[k]; |
|
2310
|
} |
|
2311
|
if( fossil_strcmp(zCtrl,"all")==0 ){ |
|
2312
|
zCtrl = "cdtwefh"; |
|
2313
|
} |
|
2314
|
if( strlen(zCtrl)>=4 ){ |
|
2315
|
/* If the argument to "enable" or "disable" is a string of at least |
|
2316
|
** 4 characters which matches part of any aSetng.zName, then use that |
|
2317
|
** one aSetng value only. */ |
|
2318
|
char *zGlob = mprintf("*%s*", zCtrl); |
|
2319
|
for(j=0; j<count(aSetng); j++){ |
|
2320
|
if( sqlite3_strglob(zGlob, aSetng[j].zName)==0 ){ |
|
2321
|
db_set_int(aSetng[j].zSetting/*works-like:"x"*/, iCmd-3, 0); |
|
2322
|
zCtrl = 0; |
|
2323
|
break; |
|
2324
|
} |
|
2325
|
} |
|
2326
|
fossil_free(zGlob); |
|
2327
|
} |
|
2328
|
if( zCtrl ){ |
|
2329
|
for(j=0; j<count(aSetng); j++){ |
|
2330
|
if( strchr(zCtrl, aSetng[j].zSw[0])!=0 ){ |
|
2331
|
db_set_int(aSetng[j].zSetting/*works-like:"x"*/, iCmd-3, 0); |
|
2332
|
} |
|
2333
|
} |
|
2334
|
} |
|
2335
|
} |
|
2336
|
}else if( iCmd==5 ){ |
|
2337
|
int iOldTokenizer, iNewTokenizer; |
|
2338
|
if( g.argc<4 ) usage("tokenizer porter|on|off|trigram|unicode61"); |
|
2339
|
iOldTokenizer = search_tokenizer_type(0); |
|
2340
|
db_set("search-tokenizer", |
|
2341
|
search_tokenizer_for_string(g.argv[3]), 0); |
|
2342
|
iNewTokenizer = search_tokenizer_type(1); |
|
2343
|
if( iOldTokenizer!=iNewTokenizer ){ |
|
2344
|
/* Drop or rebuild index if tokenizer changes. */ |
|
2345
|
iAction = 1 + ((iOldTokenizer && iNewTokenizer) |
|
2346
|
? 1 : (iNewTokenizer ? 1 : 0)); |
|
2347
|
} |
|
2348
|
} |
|
2349
|
|
|
2350
|
/* destroy or rebuild the index, if requested */ |
|
2351
|
if( iAction>=1 ){ |
|
2352
|
search_drop_index(); |
|
2353
|
} |
|
2354
|
if( iAction>=2 ){ |
|
2355
|
search_rebuild_index(); |
|
2356
|
} |
|
2357
|
|
|
2358
|
/* Always show the status before ending */ |
|
2359
|
for(i=0; i<count(aSetng); i++){ |
|
2360
|
fossil_print("%-21s %s\n", aSetng[i].zName, |
|
2361
|
db_get_boolean(aSetng[i].zSetting,0) ? "on" : "off"); |
|
2362
|
} |
|
2363
|
fossil_print("%-21s %s\n", "tokenizer:", |
|
2364
|
search_tokenizer_for_string(0)); |
|
2365
|
if( search_index_exists() ){ |
|
2366
|
int pgsz = db_int64(0, "PRAGMA repository.page_size;"); |
|
2367
|
i64 nTotal = db_int64(0, "PRAGMA repository.page_count;")*pgsz; |
|
2368
|
i64 nFts = db_int64(0, "SELECT count(*) FROM dbstat" |
|
2369
|
" WHERE schema='repository'" |
|
2370
|
" AND name LIKE 'fts%%'")*pgsz; |
|
2371
|
char zSize[50]; |
|
2372
|
fossil_print("%-21s FTS%d\n", "full-text index:", search_index_type(1)); |
|
2373
|
fossil_print("%-21s %d\n", "documents:", |
|
2374
|
db_int(0, "SELECT count(*) FROM ftsdocs")); |
|
2375
|
approxSizeName(sizeof(zSize), zSize, nFts); |
|
2376
|
fossil_print("%-21s %s (%.1f%% of repository)\n", "space used", |
|
2377
|
zSize, 100.0*((double)nFts/(double)nTotal)); |
|
2378
|
}else{ |
|
2379
|
fossil_print("%-21s disabled\n", "full-text index:"); |
|
2380
|
} |
|
2381
|
db_end_transaction(0); |
|
2382
|
} |
|
2383
|
|
|
2384
|
/* |
|
2385
|
** WEBPAGE: test-ftsdocs |
|
2386
|
** |
|
2387
|
** Show a table of all documents currently in the search index. |
|
2388
|
*/ |
|
2389
|
void search_data_page(void){ |
|
2390
|
Stmt q; |
|
2391
|
const char *zId = P("id"); |
|
2392
|
const char *zType = P("y"); |
|
2393
|
const char *zIdxed = P("ixed"); |
|
2394
|
int id; |
|
2395
|
int cnt1 = 0, cnt2 = 0, cnt3 = 0; |
|
2396
|
login_check_credentials(); |
|
2397
|
if( !g.perm.Admin ){ login_needed(0); return; } |
|
2398
|
style_set_current_feature("test"); |
|
2399
|
if( !search_index_exists() ){ |
|
2400
|
@ <p>Indexed search is disabled |
|
2401
|
style_finish_page(); |
|
2402
|
return; |
|
2403
|
} |
|
2404
|
search_sql_setup(g.db); |
|
2405
|
style_submenu_element("Setup","%R/srchsetup"); |
|
2406
|
if( zId!=0 && (id = atoi(zId))>0 ){ |
|
2407
|
/* Show information about a single ftsdocs entry */ |
|
2408
|
style_header("Information about ftsdoc entry %d", id); |
|
2409
|
style_submenu_element("Summary","%R/test-ftsdocs"); |
|
2410
|
db_prepare(&q, |
|
2411
|
"SELECT type||rid, name, idxed, label, url, datetime(mtime)" |
|
2412
|
" FROM ftsdocs WHERE rowid=%d", id |
|
2413
|
); |
|
2414
|
if( db_step(&q)==SQLITE_ROW ){ |
|
2415
|
const char *zUrl = db_column_text(&q,4); |
|
2416
|
const char *zDocId = db_column_text(&q,0); |
|
2417
|
char *zName; |
|
2418
|
char *z; |
|
2419
|
@ <table border=0> |
|
2420
|
@ <tr><td align='right'>rowid:<td> <td>%d(id) |
|
2421
|
@ <tr><td align='right'>id:<td><td>%s(zDocId) |
|
2422
|
@ <tr><td align='right'>name:<td><td>%h(db_column_text(&q,1)) |
|
2423
|
@ <tr><td align='right'>idxed:<td><td>%d(db_column_int(&q,2)) |
|
2424
|
@ <tr><td align='right'>label:<td><td>%h(db_column_text(&q,3)) |
|
2425
|
@ <tr><td align='right'>url:<td><td> |
|
2426
|
@ <a href='%R%s(zUrl)'>%h(zUrl)</a> |
|
2427
|
@ <tr><td align='right'>mtime:<td><td>%s(db_column_text(&q,5)) |
|
2428
|
z = db_text(0, "SELECT title FROM ftsidx WHERE rowid=%d",id); |
|
2429
|
if( z && z[0] ){ |
|
2430
|
@ <tr><td align="right">title:<td><td>%h(z) |
|
2431
|
fossil_free(z); |
|
2432
|
} |
|
2433
|
z = db_text(0, "SELECT body FROM ftsidx WHERE rowid=%d",id); |
|
2434
|
if( z && z[0] ){ |
|
2435
|
@ <tr><td align="right" valign="top">body:<td><td>%h(z) |
|
2436
|
fossil_free(z); |
|
2437
|
} |
|
2438
|
@ </table> |
|
2439
|
zName = mprintf("Indexed '%c' docs",zDocId[0]); |
|
2440
|
style_submenu_element(zName,"%R/test-ftsdocs?y=%c&ixed=1",zDocId[0]); |
|
2441
|
zName = mprintf("Unindexed '%c' docs",zDocId[0]); |
|
2442
|
style_submenu_element(zName,"%R/test-ftsdocs?y=%c&ixed=0",zDocId[0]); |
|
2443
|
} |
|
2444
|
db_finalize(&q); |
|
2445
|
style_finish_page(); |
|
2446
|
return; |
|
2447
|
} |
|
2448
|
if( zType!=0 && zType[0]!=0 && zType[1]==0 && |
|
2449
|
zIdxed!=0 && (zIdxed[0]=='1' || zIdxed[0]=='0') && zIdxed[1]==0 |
|
2450
|
){ |
|
2451
|
int ixed = zIdxed[0]=='1'; |
|
2452
|
char *zName; |
|
2453
|
style_header("List of '%c' documents that are%s indexed", |
|
2454
|
zType[0], ixed ? "" : " not"); |
|
2455
|
style_submenu_element("Summary","%R/test-ftsdocs"); |
|
2456
|
if( ixed==0 ){ |
|
2457
|
zName = mprintf("Indexed '%c' docs",zType[0]); |
|
2458
|
style_submenu_element(zName,"%R/test-ftsdocs?y=%c&ixed=1",zType[0]); |
|
2459
|
}else{ |
|
2460
|
zName = mprintf("Unindexed '%c' docs",zType[0]); |
|
2461
|
style_submenu_element(zName,"%R/test-ftsdocs?y=%c&ixed=0",zType[0]); |
|
2462
|
} |
|
2463
|
db_prepare(&q, |
|
2464
|
"SELECT rowid, type||rid ||' '|| coalesce(label,'')" |
|
2465
|
" FROM ftsdocs WHERE type='%c' AND %s idxed", |
|
2466
|
zType[0], ixed ? "" : "NOT" |
|
2467
|
); |
|
2468
|
@ <ul> |
|
2469
|
while( db_step(&q)==SQLITE_ROW ){ |
|
2470
|
@ <li> <a href='test-ftsdocs?id=%d(db_column_int(&q,0))'> |
|
2471
|
@ %h(db_column_text(&q,1))</a> |
|
2472
|
} |
|
2473
|
@ </ul> |
|
2474
|
db_finalize(&q); |
|
2475
|
style_finish_page(); |
|
2476
|
return; |
|
2477
|
} |
|
2478
|
style_header("Summary of ftsdocs"); |
|
2479
|
db_prepare(&q, |
|
2480
|
"SELECT type, sum(idxed IS TRUE), sum(idxed IS FALSE), count(*)" |
|
2481
|
" FROM ftsdocs" |
|
2482
|
" GROUP BY 1 ORDER BY 4 DESC" |
|
2483
|
); |
|
2484
|
@ <table border=1 cellpadding=3 cellspacing=0> |
|
2485
|
@ <thead> |
|
2486
|
@ <tr><th>Type<th>Indexed<th>Unindexed<th>Total |
|
2487
|
@ </thead> |
|
2488
|
@ <tbody> |
|
2489
|
while( db_step(&q)==SQLITE_ROW ){ |
|
2490
|
const char *zType = db_column_text(&q,0); |
|
2491
|
int nIndexed = db_column_int(&q, 1); |
|
2492
|
int nUnindexed = db_column_int(&q, 2); |
|
2493
|
int nTotal = db_column_int(&q, 3); |
|
2494
|
@ <tr><td>%h(zType) |
|
2495
|
if( nIndexed>0 ){ |
|
2496
|
@ <td align="right"><a href='%R/test-ftsdocs?y=%s(zType)&ixed=1'>\ |
|
2497
|
@ %d(nIndexed)</a> |
|
2498
|
}else{ |
|
2499
|
@ <td align="right">0 |
|
2500
|
} |
|
2501
|
if( nUnindexed>0 ){ |
|
2502
|
@ <td align="right"><a href='%R/test-ftsdocs?y=%s(zType)&ixed=0'>\ |
|
2503
|
@ %d(nUnindexed)</a> |
|
2504
|
}else{ |
|
2505
|
@ <td align="right">0 |
|
2506
|
} |
|
2507
|
@ <td align="right">%d(nTotal) |
|
2508
|
@ </tr> |
|
2509
|
cnt1 += nIndexed; |
|
2510
|
cnt2 += nUnindexed; |
|
2511
|
cnt3 += nTotal; |
|
2512
|
} |
|
2513
|
db_finalize(&q); |
|
2514
|
@ </tbody><tfooter> |
|
2515
|
@ <tr><th>Total<th align="right">%d(cnt1)<th align="right">%d(cnt2) |
|
2516
|
@ <th align="right">%d(cnt3) |
|
2517
|
@ </tfooter> |
|
2518
|
@ </table> |
|
2519
|
style_finish_page(); |
|
2520
|
} |
|
2521
|
|
|
2522
|
|
|
2523
|
/* |
|
2524
|
** The Fts5MatchinfoCtx bits were all taken verbatim from: |
|
2525
|
** |
|
2526
|
** https://sqlite.org/src/finfo?name=ext/fts5/fts5_test_mi.c |
|
2527
|
*/ |
|
2528
|
|
|
2529
|
typedef struct Fts5MatchinfoCtx Fts5MatchinfoCtx; |
|
2530
|
|
|
2531
|
#if INTERFACE |
|
2532
|
#ifndef SQLITE_AMALGAMATION |
|
2533
|
typedef unsigned int u32; |
|
2534
|
#endif |
|
2535
|
#endif |
|
2536
|
|
|
2537
|
struct Fts5MatchinfoCtx { |
|
2538
|
int nCol; /* Number of cols in FTS5 table */ |
|
2539
|
int nPhrase; /* Number of phrases in FTS5 query */ |
|
2540
|
char *zArg; /* nul-term'd copy of 2nd arg */ |
|
2541
|
int nRet; /* Number of elements in aRet[] */ |
|
2542
|
u32 *aRet; /* Array of 32-bit unsigned ints to return */ |
|
2543
|
}; |
|
2544
|
|
|
2545
|
|
|
2546
|
/* |
|
2547
|
** Return a pointer to the fts5_api pointer for database connection db. |
|
2548
|
** If an error occurs, return NULL and leave an error in the database |
|
2549
|
** handle (accessible using sqlite3_errcode()/errmsg()). |
|
2550
|
*/ |
|
2551
|
static int fts5_api_from_db(sqlite3 *db, fts5_api **ppApi){ |
|
2552
|
sqlite3_stmt *pStmt = 0; |
|
2553
|
int rc; |
|
2554
|
|
|
2555
|
*ppApi = 0; |
|
2556
|
rc = sqlite3_prepare(db, "SELECT fts5(?1)", -1, &pStmt, 0); |
|
2557
|
if( rc==SQLITE_OK ){ |
|
2558
|
sqlite3_bind_pointer(pStmt, 1, (void*)ppApi, "fts5_api_ptr", 0); |
|
2559
|
(void)sqlite3_step(pStmt); |
|
2560
|
rc = sqlite3_finalize(pStmt); |
|
2561
|
} |
|
2562
|
|
|
2563
|
return rc; |
|
2564
|
} |
|
2565
|
|
|
2566
|
|
|
2567
|
/* |
|
2568
|
** Argument f should be a flag accepted by matchinfo() (a valid character |
|
2569
|
** in the string passed as the second argument). If it is not, -1 is |
|
2570
|
** returned. Otherwise, if f is a valid matchinfo flag, the value returned |
|
2571
|
** is the number of 32-bit integers added to the output array if the |
|
2572
|
** table has nCol columns and the query nPhrase phrases. |
|
2573
|
*/ |
|
2574
|
static int fts5MatchinfoFlagsize(int nCol, int nPhrase, char f){ |
|
2575
|
int ret = -1; |
|
2576
|
switch( f ){ |
|
2577
|
case 'p': ret = 1; break; |
|
2578
|
case 'c': ret = 1; break; |
|
2579
|
case 'x': ret = 3 * nCol * nPhrase; break; |
|
2580
|
case 'y': ret = nCol * nPhrase; break; |
|
2581
|
case 'b': ret = ((nCol + 31) / 32) * nPhrase; break; |
|
2582
|
case 'n': ret = 1; break; |
|
2583
|
case 'a': ret = nCol; break; |
|
2584
|
case 'l': ret = nCol; break; |
|
2585
|
case 's': ret = nCol; break; |
|
2586
|
} |
|
2587
|
return ret; |
|
2588
|
} |
|
2589
|
|
|
2590
|
static int fts5MatchinfoIter( |
|
2591
|
const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ |
|
2592
|
Fts5Context *pFts, /* First arg to pass to pApi functions */ |
|
2593
|
Fts5MatchinfoCtx *p, |
|
2594
|
int(*x)(const Fts5ExtensionApi*,Fts5Context*,Fts5MatchinfoCtx*,char,u32*) |
|
2595
|
){ |
|
2596
|
int i; |
|
2597
|
int n = 0; |
|
2598
|
int rc = SQLITE_OK; |
|
2599
|
char f; |
|
2600
|
for(i=0; (f = p->zArg[i]); i++){ |
|
2601
|
rc = x(pApi, pFts, p, f, &p->aRet[n]); |
|
2602
|
if( rc!=SQLITE_OK ) break; |
|
2603
|
n += fts5MatchinfoFlagsize(p->nCol, p->nPhrase, f); |
|
2604
|
} |
|
2605
|
return rc; |
|
2606
|
} |
|
2607
|
|
|
2608
|
static int fts5MatchinfoXCb( |
|
2609
|
const Fts5ExtensionApi *pApi, |
|
2610
|
Fts5Context *pFts, |
|
2611
|
void *pUserData |
|
2612
|
){ |
|
2613
|
Fts5PhraseIter iter; |
|
2614
|
int iCol, iOff; |
|
2615
|
u32 *aOut = (u32*)pUserData; |
|
2616
|
int iPrev = -1; |
|
2617
|
|
|
2618
|
for(pApi->xPhraseFirst(pFts, 0, &iter, &iCol, &iOff); |
|
2619
|
iCol>=0; |
|
2620
|
pApi->xPhraseNext(pFts, &iter, &iCol, &iOff) |
|
2621
|
){ |
|
2622
|
aOut[iCol*3+1]++; |
|
2623
|
if( iCol!=iPrev ) aOut[iCol*3 + 2]++; |
|
2624
|
iPrev = iCol; |
|
2625
|
} |
|
2626
|
|
|
2627
|
return SQLITE_OK; |
|
2628
|
} |
|
2629
|
|
|
2630
|
static int fts5MatchinfoGlobalCb( |
|
2631
|
const Fts5ExtensionApi *pApi, |
|
2632
|
Fts5Context *pFts, |
|
2633
|
Fts5MatchinfoCtx *p, |
|
2634
|
char f, |
|
2635
|
u32 *aOut |
|
2636
|
){ |
|
2637
|
int rc = SQLITE_OK; |
|
2638
|
switch( f ){ |
|
2639
|
case 'p': |
|
2640
|
aOut[0] = p->nPhrase; |
|
2641
|
break; |
|
2642
|
|
|
2643
|
case 'c': |
|
2644
|
aOut[0] = p->nCol; |
|
2645
|
break; |
|
2646
|
|
|
2647
|
case 'x': { |
|
2648
|
int i; |
|
2649
|
for(i=0; i<p->nPhrase && rc==SQLITE_OK; i++){ |
|
2650
|
void *pPtr = (void*)&aOut[i * p->nCol * 3]; |
|
2651
|
rc = pApi->xQueryPhrase(pFts, i, pPtr, fts5MatchinfoXCb); |
|
2652
|
} |
|
2653
|
break; |
|
2654
|
} |
|
2655
|
|
|
2656
|
case 'n': { |
|
2657
|
sqlite3_int64 nRow; |
|
2658
|
rc = pApi->xRowCount(pFts, &nRow); |
|
2659
|
aOut[0] = (u32)nRow; |
|
2660
|
break; |
|
2661
|
} |
|
2662
|
|
|
2663
|
case 'a': { |
|
2664
|
sqlite3_int64 nRow = 0; |
|
2665
|
rc = pApi->xRowCount(pFts, &nRow); |
|
2666
|
if( nRow==0 ){ |
|
2667
|
memset(aOut, 0, sizeof(u32) * p->nCol); |
|
2668
|
}else{ |
|
2669
|
int i; |
|
2670
|
for(i=0; rc==SQLITE_OK && i<p->nCol; i++){ |
|
2671
|
sqlite3_int64 nToken; |
|
2672
|
rc = pApi->xColumnTotalSize(pFts, i, &nToken); |
|
2673
|
if( rc==SQLITE_OK){ |
|
2674
|
aOut[i] = (u32)((2*nToken + nRow) / (2*nRow)); |
|
2675
|
} |
|
2676
|
} |
|
2677
|
} |
|
2678
|
break; |
|
2679
|
} |
|
2680
|
|
|
2681
|
} |
|
2682
|
return rc; |
|
2683
|
} |
|
2684
|
|
|
2685
|
static int fts5MatchinfoLocalCb( |
|
2686
|
const Fts5ExtensionApi *pApi, |
|
2687
|
Fts5Context *pFts, |
|
2688
|
Fts5MatchinfoCtx *p, |
|
2689
|
char f, |
|
2690
|
u32 *aOut |
|
2691
|
){ |
|
2692
|
int i; |
|
2693
|
int rc = SQLITE_OK; |
|
2694
|
|
|
2695
|
switch( f ){ |
|
2696
|
case 'b': { |
|
2697
|
int iPhrase; |
|
2698
|
int nInt = ((p->nCol + 31) / 32) * p->nPhrase; |
|
2699
|
for(i=0; i<nInt; i++) aOut[i] = 0; |
|
2700
|
|
|
2701
|
for(iPhrase=0; iPhrase<p->nPhrase; iPhrase++){ |
|
2702
|
Fts5PhraseIter iter; |
|
2703
|
int iCol; |
|
2704
|
for(pApi->xPhraseFirstColumn(pFts, iPhrase, &iter, &iCol); |
|
2705
|
iCol>=0; |
|
2706
|
pApi->xPhraseNextColumn(pFts, &iter, &iCol) |
|
2707
|
){ |
|
2708
|
aOut[iPhrase * ((p->nCol+31)/32) + iCol/32] |= ((u32)1 << iCol%32); |
|
2709
|
} |
|
2710
|
} |
|
2711
|
|
|
2712
|
break; |
|
2713
|
} |
|
2714
|
|
|
2715
|
case 'x': |
|
2716
|
case 'y': { |
|
2717
|
int nMul = (f=='x' ? 3 : 1); |
|
2718
|
int iPhrase; |
|
2719
|
|
|
2720
|
for(i=0; i<(p->nCol*p->nPhrase); i++) aOut[i*nMul] = 0; |
|
2721
|
|
|
2722
|
for(iPhrase=0; iPhrase<p->nPhrase; iPhrase++){ |
|
2723
|
Fts5PhraseIter iter; |
|
2724
|
int iOff, iCol; |
|
2725
|
for(pApi->xPhraseFirst(pFts, iPhrase, &iter, &iCol, &iOff); |
|
2726
|
iOff>=0; |
|
2727
|
pApi->xPhraseNext(pFts, &iter, &iCol, &iOff) |
|
2728
|
){ |
|
2729
|
aOut[nMul * (iCol + iPhrase * p->nCol)]++; |
|
2730
|
} |
|
2731
|
} |
|
2732
|
|
|
2733
|
break; |
|
2734
|
} |
|
2735
|
|
|
2736
|
case 'l': { |
|
2737
|
for(i=0; rc==SQLITE_OK && i<p->nCol; i++){ |
|
2738
|
int nToken; |
|
2739
|
rc = pApi->xColumnSize(pFts, i, &nToken); |
|
2740
|
aOut[i] = (u32)nToken; |
|
2741
|
} |
|
2742
|
break; |
|
2743
|
} |
|
2744
|
|
|
2745
|
case 's': { |
|
2746
|
int nInst; |
|
2747
|
|
|
2748
|
memset(aOut, 0, sizeof(u32) * p->nCol); |
|
2749
|
|
|
2750
|
rc = pApi->xInstCount(pFts, &nInst); |
|
2751
|
for(i=0; rc==SQLITE_OK && i<nInst; i++){ |
|
2752
|
int iPhrase, iOff, iCol = 0; |
|
2753
|
int iNextPhrase; |
|
2754
|
int iNextOff; |
|
2755
|
u32 nSeq = 1; |
|
2756
|
int j; |
|
2757
|
|
|
2758
|
rc = pApi->xInst(pFts, i, &iPhrase, &iCol, &iOff); |
|
2759
|
iNextPhrase = iPhrase+1; |
|
2760
|
iNextOff = iOff+pApi->xPhraseSize(pFts, 0); |
|
2761
|
for(j=i+1; rc==SQLITE_OK && j<nInst; j++){ |
|
2762
|
int ip, ic, io; |
|
2763
|
rc = pApi->xInst(pFts, j, &ip, &ic, &io); |
|
2764
|
if( ic!=iCol || io>iNextOff ) break; |
|
2765
|
if( ip==iNextPhrase && io==iNextOff ){ |
|
2766
|
nSeq++; |
|
2767
|
iNextPhrase = ip+1; |
|
2768
|
iNextOff = io + pApi->xPhraseSize(pFts, ip); |
|
2769
|
} |
|
2770
|
} |
|
2771
|
|
|
2772
|
if( nSeq>aOut[iCol] ) aOut[iCol] = nSeq; |
|
2773
|
} |
|
2774
|
|
|
2775
|
break; |
|
2776
|
} |
|
2777
|
} |
|
2778
|
return rc; |
|
2779
|
} |
|
2780
|
|
|
2781
|
static Fts5MatchinfoCtx *fts5MatchinfoNew( |
|
2782
|
const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ |
|
2783
|
Fts5Context *pFts, /* First arg to pass to pApi functions */ |
|
2784
|
sqlite3_context *pCtx, /* Context for returning error message */ |
|
2785
|
const char *zArg /* Matchinfo flag string */ |
|
2786
|
){ |
|
2787
|
Fts5MatchinfoCtx *p; |
|
2788
|
int nCol; |
|
2789
|
int nPhrase; |
|
2790
|
int i; |
|
2791
|
int nInt; |
|
2792
|
sqlite3_int64 nByte; |
|
2793
|
int rc; |
|
2794
|
|
|
2795
|
nCol = pApi->xColumnCount(pFts); |
|
2796
|
nPhrase = pApi->xPhraseCount(pFts); |
|
2797
|
|
|
2798
|
nInt = 0; |
|
2799
|
for(i=0; zArg[i]; i++){ |
|
2800
|
int n = fts5MatchinfoFlagsize(nCol, nPhrase, zArg[i]); |
|
2801
|
if( n<0 ){ |
|
2802
|
char *zErr = sqlite3_mprintf("unrecognized matchinfo flag: %c", zArg[i]); |
|
2803
|
sqlite3_result_error(pCtx, zErr, -1); |
|
2804
|
sqlite3_free(zErr); |
|
2805
|
return 0; |
|
2806
|
} |
|
2807
|
nInt += n; |
|
2808
|
} |
|
2809
|
|
|
2810
|
nByte = sizeof(Fts5MatchinfoCtx) /* The struct itself */ |
|
2811
|
+ sizeof(u32) * nInt /* The p->aRet[] array */ |
|
2812
|
+ (i+1); /* The p->zArg string */ |
|
2813
|
p = (Fts5MatchinfoCtx*)sqlite3_malloc64(nByte); |
|
2814
|
if( p==0 ){ |
|
2815
|
sqlite3_result_error_nomem(pCtx); |
|
2816
|
return 0; |
|
2817
|
} |
|
2818
|
memset(p, 0, nByte); |
|
2819
|
|
|
2820
|
p->nCol = nCol; |
|
2821
|
p->nPhrase = nPhrase; |
|
2822
|
p->aRet = (u32*)&p[1]; |
|
2823
|
p->nRet = nInt; |
|
2824
|
p->zArg = (char*)&p->aRet[nInt]; |
|
2825
|
memcpy(p->zArg, zArg, i); |
|
2826
|
|
|
2827
|
rc = fts5MatchinfoIter(pApi, pFts, p, fts5MatchinfoGlobalCb); |
|
2828
|
if( rc!=SQLITE_OK ){ |
|
2829
|
sqlite3_result_error_code(pCtx, rc); |
|
2830
|
sqlite3_free(p); |
|
2831
|
p = 0; |
|
2832
|
} |
|
2833
|
|
|
2834
|
return p; |
|
2835
|
} |
|
2836
|
|
|
2837
|
static void fts5MatchinfoFunc( |
|
2838
|
const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ |
|
2839
|
Fts5Context *pFts, /* First arg to pass to pApi functions */ |
|
2840
|
sqlite3_context *pCtx, /* Context for returning result/error */ |
|
2841
|
int nVal, |