|
1
|
/* |
|
2
|
** Copyright (c) 2007 D. Richard Hipp |
|
3
|
** |
|
4
|
** This program is free software; you can redistribute it and/or |
|
5
|
** modify it under the terms of the Simplified BSD License (also |
|
6
|
** known as the "2-Clause License" or "FreeBSD License".) |
|
7
|
|
|
8
|
** This program is distributed in the hope that it will be useful, |
|
9
|
** but without any warranty; without even the implied warranty of |
|
10
|
** merchantability or fitness for a particular purpose. |
|
11
|
** |
|
12
|
** Author contact information: |
|
13
|
** [email protected] |
|
14
|
** http://www.hwaci.com/drh/ |
|
15
|
** |
|
16
|
******************************************************************************* |
|
17
|
** |
|
18
|
** This file contains code to implement string comparisons using a |
|
19
|
** variety of algorithm. The comparison algorithm can be any of: |
|
20
|
** |
|
21
|
** MS_EXACT The string must exactly match the pattern. |
|
22
|
** |
|
23
|
** MS_BRLIST The pattern is a space- and/or comma-separated |
|
24
|
** list of strings, any one of which may match |
|
25
|
** the input string. |
|
26
|
** |
|
27
|
** MS_GLOB Like BRLIST, except each component of the pattern |
|
28
|
** is a GLOB expression. |
|
29
|
** |
|
30
|
** MS_LIKE Like BRLIST, except each component of the pattern |
|
31
|
** is an SQL LIKE expression. |
|
32
|
** |
|
33
|
** MS_REGEXP Like BRLIST, except each component of the pattern |
|
34
|
** is a regular expression. |
|
35
|
** |
|
36
|
*/ |
|
37
|
#include "config.h" |
|
38
|
#include <string.h> |
|
39
|
#include "match.h" |
|
40
|
|
|
41
|
#if INTERFACE |
|
42
|
/* |
|
43
|
** Types of comparisons that we are able to perform: |
|
44
|
*/ |
|
45
|
typedef enum { |
|
46
|
MS_EXACT=1, /* Exact string comparison */ |
|
47
|
MS_GLOB=2, /* Matches against a list of GLOB patterns. */ |
|
48
|
MS_LIKE=3, /* Matches against a list of LIKE patterns. */ |
|
49
|
MS_REGEXP=4, /* Matches against a list of regular expressions. */ |
|
50
|
MS_BRLIST=5, /* Matches any element of a list */ |
|
51
|
} MatchStyle; |
|
52
|
|
|
53
|
/* |
|
54
|
** The following object represents a precompiled pattern to use for |
|
55
|
** string matching. |
|
56
|
** |
|
57
|
** * Create an instance of this object using match_create(). |
|
58
|
** * Do comparisons using match_text(). |
|
59
|
** * Destroy using match_free() when you are done. |
|
60
|
** |
|
61
|
*/ |
|
62
|
struct Matcher { |
|
63
|
MatchStyle style; /* Which algorithm to use */ |
|
64
|
int nPattern; /* How many patterns are their */ |
|
65
|
char **azPattern; /* List of patterns */ |
|
66
|
ReCompiled **aRe; /* List of compiled regular expressions */ |
|
67
|
}; |
|
68
|
|
|
69
|
#endif /*INTERFACE*/ |
|
70
|
|
|
71
|
/* |
|
72
|
** Translate a "match style" text name into the MS_* enum value. |
|
73
|
** Return eDflt if no match is found. |
|
74
|
*/ |
|
75
|
MatchStyle match_style(const char *zStyle, MatchStyle eDflt){ |
|
76
|
if( zStyle==0 ) return eDflt; |
|
77
|
if( fossil_stricmp(zStyle, "brlist")==0 ) return MS_BRLIST; |
|
78
|
if( fossil_stricmp(zStyle, "list")==0 ) return MS_BRLIST; |
|
79
|
if( fossil_stricmp(zStyle, "regexp")==0 ) return MS_REGEXP; |
|
80
|
if( fossil_stricmp(zStyle, "re")==0 ) return MS_REGEXP; |
|
81
|
if( fossil_stricmp(zStyle, "glob")==0 ) return MS_GLOB; |
|
82
|
if( fossil_stricmp(zStyle, "like")==0 ) return MS_LIKE; |
|
83
|
if( fossil_stricmp(zStyle, "exact")==0 ) return MS_EXACT; |
|
84
|
return eDflt; |
|
85
|
} |
|
86
|
|
|
87
|
|
|
88
|
/* |
|
89
|
** Create a new Matcher object using the pattern provided. |
|
90
|
*/ |
|
91
|
Matcher *match_create(MatchStyle style, const char *zPat){ |
|
92
|
char cDel; /* Delimiter character */ |
|
93
|
int i; /* Loop counter */ |
|
94
|
Matcher *p; /* The new Matcher to be constructed */ |
|
95
|
char *zOne; /* One element of the pattern */ |
|
96
|
|
|
97
|
if( zPat==0 ) return 0; |
|
98
|
p = fossil_malloc( sizeof(*p) ); |
|
99
|
memset(p, 0, sizeof(*p)); |
|
100
|
p->style = style; |
|
101
|
|
|
102
|
if( style==MS_EXACT ){ |
|
103
|
p->nPattern = 1; |
|
104
|
p->azPattern = fossil_malloc( sizeof(p->azPattern[0]) ); |
|
105
|
p->azPattern[0] = fossil_strdup(zPat); |
|
106
|
return p; |
|
107
|
} |
|
108
|
|
|
109
|
while( 1 ){ |
|
110
|
/* Skip leading delimiters. */ |
|
111
|
for( ; fossil_isspace(*zPat) || *zPat==','; ++zPat ); |
|
112
|
|
|
113
|
/* Next non-delimiter character determines quoting. */ |
|
114
|
if( zPat[0]==0 ){ |
|
115
|
/* Terminate loop at end of string. */ |
|
116
|
break; |
|
117
|
}else if( zPat[0]=='\'' || zPat[0]=='"' ){ |
|
118
|
/* If word is quoted, prepare to stop at end quote. */ |
|
119
|
cDel = zPat[0]; |
|
120
|
++zPat; |
|
121
|
}else{ |
|
122
|
/* If word is not quoted, prepare to stop at delimiter. */ |
|
123
|
cDel = ','; |
|
124
|
} |
|
125
|
|
|
126
|
/* Find the next delimiter character or end of string. */ |
|
127
|
for( i=0; zPat[i] && zPat[i]!=cDel; ++i ){ |
|
128
|
/* If delimiter is comma, also recognize spaces as delimiters. */ |
|
129
|
if( cDel==',' && fossil_isspace(zPat[i]) ){ |
|
130
|
break; |
|
131
|
} |
|
132
|
|
|
133
|
/* In regexp mode, ignore delimiters following backslashes. */ |
|
134
|
if( style==MS_REGEXP && zPat[i]=='\\' && zPat[i+1] ){ |
|
135
|
++i; |
|
136
|
} |
|
137
|
} |
|
138
|
|
|
139
|
/* zOne is a zero-terminated copy of the pattern, without delimiters */ |
|
140
|
zOne = fossil_strndup(zPat, i); |
|
141
|
zPat += i; |
|
142
|
if( zPat[0] ) zPat++; |
|
143
|
|
|
144
|
/* Check for regular expression syntax errors. */ |
|
145
|
if( style==MS_REGEXP ){ |
|
146
|
ReCompiled *regexp; |
|
147
|
const char *zFail = fossil_re_compile(®exp, zOne, 0); |
|
148
|
if( zFail ){ |
|
149
|
re_free(regexp); |
|
150
|
continue; |
|
151
|
} |
|
152
|
p->nPattern++; |
|
153
|
p->aRe = fossil_realloc(p->aRe, sizeof(p->aRe)*p->nPattern); |
|
154
|
p->aRe[p->nPattern-1] = regexp; |
|
155
|
fossil_free(zOne); |
|
156
|
}else{ |
|
157
|
p->nPattern++; |
|
158
|
p->azPattern = fossil_realloc(p->azPattern, sizeof(char*)*p->nPattern); |
|
159
|
p->azPattern[p->nPattern-1] = zOne; |
|
160
|
} |
|
161
|
} |
|
162
|
return p; |
|
163
|
} |
|
164
|
|
|
165
|
/* |
|
166
|
** Return non-zero (true) if the input string matches the pattern |
|
167
|
** described by the matcher. |
|
168
|
** |
|
169
|
** The return value is really the 1-based index of the particular |
|
170
|
** pattern that matched. |
|
171
|
*/ |
|
172
|
int match_text(Matcher *p, const char *zText){ |
|
173
|
int i; |
|
174
|
if( p==0 ){ |
|
175
|
return zText==0; |
|
176
|
} |
|
177
|
switch( p->style ){ |
|
178
|
case MS_BRLIST: |
|
179
|
case MS_EXACT: { |
|
180
|
for(i=0; i<p->nPattern; i++){ |
|
181
|
if( strcmp(p->azPattern[i], zText)==0 ) return i+1; |
|
182
|
} |
|
183
|
break; |
|
184
|
} |
|
185
|
case MS_GLOB: { |
|
186
|
for(i=0; i<p->nPattern; i++){ |
|
187
|
if( sqlite3_strglob(p->azPattern[i], zText)==0 ) return i+1; |
|
188
|
} |
|
189
|
break; |
|
190
|
} |
|
191
|
case MS_LIKE: { |
|
192
|
for(i=0; i<p->nPattern; i++){ |
|
193
|
if( sqlite3_strlike(p->azPattern[i], zText, 0)==0 ) return i+1; |
|
194
|
} |
|
195
|
break; |
|
196
|
} |
|
197
|
case MS_REGEXP: { |
|
198
|
int nText = (int)strlen(zText); |
|
199
|
for(i=0; i<p->nPattern; i++){ |
|
200
|
if( re_match(p->aRe[i], (const u8*)zText, nText) ) return i+1; |
|
201
|
} |
|
202
|
break; |
|
203
|
} |
|
204
|
} |
|
205
|
return 0; |
|
206
|
} |
|
207
|
|
|
208
|
|
|
209
|
/* |
|
210
|
** Destroy a previously allocated Matcher object. |
|
211
|
*/ |
|
212
|
void match_free(Matcher *p){ |
|
213
|
int i; |
|
214
|
if( p==0 ) return; |
|
215
|
if( p->style==MS_REGEXP ){ |
|
216
|
for(i=0; i<p->nPattern; i++) re_free(p->aRe[i]); |
|
217
|
fossil_free(p->aRe); |
|
218
|
}else{ |
|
219
|
for(i=0; i<p->nPattern; i++) fossil_free(p->azPattern[i]); |
|
220
|
fossil_free(p->azPattern); |
|
221
|
} |
|
222
|
memset(p, 0, sizeof(*p)); |
|
223
|
fossil_free(p); |
|
224
|
} |
|
225
|
|
|
226
|
|
|
227
|
|
|
228
|
/* |
|
229
|
** Quote a tag string by surrounding it with double quotes and preceding |
|
230
|
** internal double quotes and backslashes with backslashes. |
|
231
|
*/ |
|
232
|
static const char *tagQuote( |
|
233
|
int len, /* Maximum length of zTag, or negative for unlimited */ |
|
234
|
const char *zTag /* Tag string */ |
|
235
|
){ |
|
236
|
Blob blob = BLOB_INITIALIZER; |
|
237
|
int i, j; |
|
238
|
blob_zero(&blob); |
|
239
|
blob_append(&blob, "\"", 1); |
|
240
|
for( i=j=0; zTag[j] && (len<0 || j<len); ++j ){ |
|
241
|
if( zTag[j]=='\"' || zTag[j]=='\\' ){ |
|
242
|
if( j>i ){ |
|
243
|
blob_append(&blob, zTag+i, j-i); |
|
244
|
} |
|
245
|
blob_append(&blob, "\\", 1); |
|
246
|
i = j; |
|
247
|
} |
|
248
|
} |
|
249
|
if( j>i ){ |
|
250
|
blob_append(&blob, zTag+i, j-i); |
|
251
|
} |
|
252
|
blob_append(&blob, "\"", 1); |
|
253
|
return blob_str(&blob); |
|
254
|
} |
|
255
|
|
|
256
|
/* |
|
257
|
** Construct the SQL expression that goes into the WHERE clause of a join |
|
258
|
** that involves the TAG table and that selects a particular tag out of |
|
259
|
** that table. |
|
260
|
** |
|
261
|
** This function is adapted from glob_expr() to support the MS_EXACT, MS_GLOB, |
|
262
|
** MS_LIKE, MS_REGEXP, and MS_BRLIST match styles. |
|
263
|
** |
|
264
|
** For MS_EXACT, the returned expression |
|
265
|
** checks for integer match against the tag ID which is looked up directly by |
|
266
|
** this function. For the other modes, the returned SQL expression performs |
|
267
|
** string comparisons against the tag names, so it is necessary to join against |
|
268
|
** the tag table to access the "tagname" column. |
|
269
|
** |
|
270
|
** Each pattern is adjusted to start with "sym-" and be anchored at end. |
|
271
|
** |
|
272
|
** In MS_REGEXP mode, backslash can be used to protect delimiter characters. |
|
273
|
** The backslashes are not removed from the regular expression. |
|
274
|
** |
|
275
|
** In addition to assembling and returning an SQL expression, this function |
|
276
|
** makes an English-language description of the patterns being matched, suitable |
|
277
|
** for display in the web interface. |
|
278
|
** |
|
279
|
** If any errors arise during processing, *zError is set to an error message. |
|
280
|
** Otherwise it is set to NULL. |
|
281
|
*/ |
|
282
|
const char *match_tag_sqlexpr( |
|
283
|
MatchStyle matchStyle, /* Match style code */ |
|
284
|
const char *zTag, /* Tag name, match pattern, or pattern list */ |
|
285
|
const char **zDesc, /* Output expression description string */ |
|
286
|
const char **zError /* Output error string */ |
|
287
|
){ |
|
288
|
Blob expr = BLOB_INITIALIZER; /* SQL expression string assembly buffer */ |
|
289
|
Blob desc = BLOB_INITIALIZER; /* English description of match patterns */ |
|
290
|
Blob err = BLOB_INITIALIZER; /* Error text assembly buffer */ |
|
291
|
const char *zStart; /* Text at start of expression */ |
|
292
|
const char *zDelimiter; /* Text between expression terms */ |
|
293
|
const char *zEnd; /* Text at end of expression */ |
|
294
|
const char *zPrefix; /* Text before each match pattern */ |
|
295
|
const char *zSuffix; /* Text after each match pattern */ |
|
296
|
const char *zIntro; /* Text introducing pattern description */ |
|
297
|
const char *zPattern = 0; /* Previous quoted pattern */ |
|
298
|
const char *zFail = 0; /* Current failure message or NULL if okay */ |
|
299
|
const char *zOr = " or "; /* Text before final quoted pattern */ |
|
300
|
char cDel; /* Input delimiter character */ |
|
301
|
int i; /* Input match pattern length counter */ |
|
302
|
|
|
303
|
/* Optimize exact matches by looking up the ID in advance to create a simple |
|
304
|
* numeric comparison. Bypass the remainder of this function. */ |
|
305
|
if( matchStyle==MS_EXACT ){ |
|
306
|
*zDesc = tagQuote(-1, zTag); |
|
307
|
return mprintf("(tagid=%d)", db_int(-1, |
|
308
|
"SELECT tagid FROM tag WHERE tagname='sym-%q'", zTag)); |
|
309
|
} |
|
310
|
|
|
311
|
/* Decide pattern prefix and suffix strings according to match style. */ |
|
312
|
if( matchStyle==MS_GLOB ){ |
|
313
|
zStart = "("; |
|
314
|
zDelimiter = " OR "; |
|
315
|
zEnd = ")"; |
|
316
|
zPrefix = "tagname GLOB 'sym-"; |
|
317
|
zSuffix = "'"; |
|
318
|
zIntro = "glob pattern "; |
|
319
|
}else if( matchStyle==MS_LIKE ){ |
|
320
|
zStart = "("; |
|
321
|
zDelimiter = " OR "; |
|
322
|
zEnd = ")"; |
|
323
|
zPrefix = "tagname LIKE 'sym-"; |
|
324
|
zSuffix = "'"; |
|
325
|
zIntro = "SQL LIKE pattern "; |
|
326
|
}else if( matchStyle==MS_REGEXP ){ |
|
327
|
zStart = "(tagname REGEXP '^sym-("; |
|
328
|
zDelimiter = "|"; |
|
329
|
zEnd = ")$')"; |
|
330
|
zPrefix = ""; |
|
331
|
zSuffix = ""; |
|
332
|
zIntro = "regular expression "; |
|
333
|
}else/* if( matchStyle==MS_BRLIST )*/{ |
|
334
|
zStart = "tagname IN ('sym-"; |
|
335
|
zDelimiter = "','sym-"; |
|
336
|
zEnd = "')"; |
|
337
|
zPrefix = ""; |
|
338
|
zSuffix = ""; |
|
339
|
zIntro = ""; |
|
340
|
} |
|
341
|
|
|
342
|
/* Convert the list of matches into an SQL expression and text description. */ |
|
343
|
blob_zero(&expr); |
|
344
|
blob_zero(&desc); |
|
345
|
blob_zero(&err); |
|
346
|
while( 1 ){ |
|
347
|
/* Skip leading delimiters. */ |
|
348
|
for( ; fossil_isspace(*zTag) || *zTag==','; ++zTag ); |
|
349
|
|
|
350
|
/* Next non-delimiter character determines quoting. */ |
|
351
|
if( !*zTag ){ |
|
352
|
/* Terminate loop at end of string. */ |
|
353
|
break; |
|
354
|
}else if( *zTag=='\'' || *zTag=='"' ){ |
|
355
|
/* If word is quoted, prepare to stop at end quote. */ |
|
356
|
cDel = *zTag; |
|
357
|
++zTag; |
|
358
|
}else{ |
|
359
|
/* If word is not quoted, prepare to stop at delimiter. */ |
|
360
|
cDel = ','; |
|
361
|
} |
|
362
|
|
|
363
|
/* Find the next delimiter character or end of string. */ |
|
364
|
for( i=0; zTag[i] && zTag[i]!=cDel; ++i ){ |
|
365
|
/* If delimiter is comma, also recognize spaces as delimiters. */ |
|
366
|
if( cDel==',' && fossil_isspace(zTag[i]) ){ |
|
367
|
break; |
|
368
|
} |
|
369
|
|
|
370
|
/* In regexp mode, ignore delimiters following backslashes. */ |
|
371
|
if( matchStyle==MS_REGEXP && zTag[i]=='\\' && zTag[i+1] ){ |
|
372
|
++i; |
|
373
|
} |
|
374
|
} |
|
375
|
|
|
376
|
/* Check for regular expression syntax errors. */ |
|
377
|
if( matchStyle==MS_REGEXP ){ |
|
378
|
ReCompiled *regexp; |
|
379
|
char *zTagDup = fossil_strndup(zTag, i); |
|
380
|
zFail = fossil_re_compile(®exp, zTagDup, 0); |
|
381
|
re_free(regexp); |
|
382
|
fossil_free(zTagDup); |
|
383
|
} |
|
384
|
|
|
385
|
/* Process success and error results. */ |
|
386
|
if( !zFail ){ |
|
387
|
/* Incorporate the match word into the output expression. %q is used to |
|
388
|
* protect against SQL injection attacks by replacing ' with ''. */ |
|
389
|
blob_appendf(&expr, "%s%s%#q%s", blob_size(&expr) ? zDelimiter : zStart, |
|
390
|
zPrefix, i, zTag, zSuffix); |
|
391
|
|
|
392
|
/* Build up the description string. */ |
|
393
|
if( !blob_size(&desc) ){ |
|
394
|
/* First tag: start with intro followed by first quoted tag. */ |
|
395
|
blob_append(&desc, zIntro, -1); |
|
396
|
blob_append(&desc, tagQuote(i, zTag), -1); |
|
397
|
}else{ |
|
398
|
if( zPattern ){ |
|
399
|
/* Third and subsequent tags: append comma then previous tag. */ |
|
400
|
blob_append(&desc, ", ", 2); |
|
401
|
blob_append(&desc, zPattern, -1); |
|
402
|
zOr = ", or "; |
|
403
|
} |
|
404
|
|
|
405
|
/* Second and subsequent tags: store quoted tag for next iteration. */ |
|
406
|
zPattern = tagQuote(i, zTag); |
|
407
|
} |
|
408
|
}else{ |
|
409
|
/* On error, skip the match word and build up the error message buffer. */ |
|
410
|
if( !blob_size(&err) ){ |
|
411
|
blob_append(&err, "Error: ", 7); |
|
412
|
}else{ |
|
413
|
blob_append(&err, ", ", 2); |
|
414
|
} |
|
415
|
blob_appendf(&err, "(%s%s: %s)", zIntro, tagQuote(i, zTag), zFail); |
|
416
|
} |
|
417
|
|
|
418
|
/* Advance past all consumed input characters. */ |
|
419
|
zTag += i; |
|
420
|
if( cDel!=',' && *zTag==cDel ){ |
|
421
|
++zTag; |
|
422
|
} |
|
423
|
} |
|
424
|
|
|
425
|
/* Finalize and extract the pattern description. */ |
|
426
|
if( zPattern ){ |
|
427
|
blob_append(&desc, zOr, -1); |
|
428
|
blob_append(&desc, zPattern, -1); |
|
429
|
} |
|
430
|
*zDesc = blob_str(&desc); |
|
431
|
|
|
432
|
/* Finalize and extract the error text. */ |
|
433
|
*zError = blob_size(&err) ? blob_str(&err) : 0; |
|
434
|
|
|
435
|
/* Finalize and extract the SQL expression. */ |
|
436
|
if( blob_size(&expr) ){ |
|
437
|
blob_append(&expr, zEnd, -1); |
|
438
|
return blob_str(&expr); |
|
439
|
} |
|
440
|
|
|
441
|
/* If execution reaches this point, the pattern was empty. Return NULL. */ |
|
442
|
return 0; |
|
443
|
} |
|
444
|
|