Fossil SCM

fossil-scm / src / match.c
Blame History Raw 444 lines
1
/*
2
** Copyright (c) 2007 D. Richard Hipp
3
**
4
** This program is free software; you can redistribute it and/or
5
** modify it under the terms of the Simplified BSD License (also
6
** known as the "2-Clause License" or "FreeBSD License".)
7
8
** This program is distributed in the hope that it will be useful,
9
** but without any warranty; without even the implied warranty of
10
** merchantability or fitness for a particular purpose.
11
**
12
** Author contact information:
13
** [email protected]
14
** http://www.hwaci.com/drh/
15
**
16
*******************************************************************************
17
**
18
** This file contains code to implement string comparisons using a
19
** variety of algorithm. The comparison algorithm can be any of:
20
**
21
** MS_EXACT The string must exactly match the pattern.
22
**
23
** MS_BRLIST The pattern is a space- and/or comma-separated
24
** list of strings, any one of which may match
25
** the input string.
26
**
27
** MS_GLOB Like BRLIST, except each component of the pattern
28
** is a GLOB expression.
29
**
30
** MS_LIKE Like BRLIST, except each component of the pattern
31
** is an SQL LIKE expression.
32
**
33
** MS_REGEXP Like BRLIST, except each component of the pattern
34
** is a regular expression.
35
**
36
*/
37
#include "config.h"
38
#include <string.h>
39
#include "match.h"
40
41
#if INTERFACE
42
/*
43
** Types of comparisons that we are able to perform:
44
*/
45
typedef enum {
46
MS_EXACT=1, /* Exact string comparison */
47
MS_GLOB=2, /* Matches against a list of GLOB patterns. */
48
MS_LIKE=3, /* Matches against a list of LIKE patterns. */
49
MS_REGEXP=4, /* Matches against a list of regular expressions. */
50
MS_BRLIST=5, /* Matches any element of a list */
51
} MatchStyle;
52
53
/*
54
** The following object represents a precompiled pattern to use for
55
** string matching.
56
**
57
** * Create an instance of this object using match_create().
58
** * Do comparisons using match_text().
59
** * Destroy using match_free() when you are done.
60
**
61
*/
62
struct Matcher {
63
MatchStyle style; /* Which algorithm to use */
64
int nPattern; /* How many patterns are their */
65
char **azPattern; /* List of patterns */
66
ReCompiled **aRe; /* List of compiled regular expressions */
67
};
68
69
#endif /*INTERFACE*/
70
71
/*
72
** Translate a "match style" text name into the MS_* enum value.
73
** Return eDflt if no match is found.
74
*/
75
MatchStyle match_style(const char *zStyle, MatchStyle eDflt){
76
if( zStyle==0 ) return eDflt;
77
if( fossil_stricmp(zStyle, "brlist")==0 ) return MS_BRLIST;
78
if( fossil_stricmp(zStyle, "list")==0 ) return MS_BRLIST;
79
if( fossil_stricmp(zStyle, "regexp")==0 ) return MS_REGEXP;
80
if( fossil_stricmp(zStyle, "re")==0 ) return MS_REGEXP;
81
if( fossil_stricmp(zStyle, "glob")==0 ) return MS_GLOB;
82
if( fossil_stricmp(zStyle, "like")==0 ) return MS_LIKE;
83
if( fossil_stricmp(zStyle, "exact")==0 ) return MS_EXACT;
84
return eDflt;
85
}
86
87
88
/*
89
** Create a new Matcher object using the pattern provided.
90
*/
91
Matcher *match_create(MatchStyle style, const char *zPat){
92
char cDel; /* Delimiter character */
93
int i; /* Loop counter */
94
Matcher *p; /* The new Matcher to be constructed */
95
char *zOne; /* One element of the pattern */
96
97
if( zPat==0 ) return 0;
98
p = fossil_malloc( sizeof(*p) );
99
memset(p, 0, sizeof(*p));
100
p->style = style;
101
102
if( style==MS_EXACT ){
103
p->nPattern = 1;
104
p->azPattern = fossil_malloc( sizeof(p->azPattern[0]) );
105
p->azPattern[0] = fossil_strdup(zPat);
106
return p;
107
}
108
109
while( 1 ){
110
/* Skip leading delimiters. */
111
for( ; fossil_isspace(*zPat) || *zPat==','; ++zPat );
112
113
/* Next non-delimiter character determines quoting. */
114
if( zPat[0]==0 ){
115
/* Terminate loop at end of string. */
116
break;
117
}else if( zPat[0]=='\'' || zPat[0]=='"' ){
118
/* If word is quoted, prepare to stop at end quote. */
119
cDel = zPat[0];
120
++zPat;
121
}else{
122
/* If word is not quoted, prepare to stop at delimiter. */
123
cDel = ',';
124
}
125
126
/* Find the next delimiter character or end of string. */
127
for( i=0; zPat[i] && zPat[i]!=cDel; ++i ){
128
/* If delimiter is comma, also recognize spaces as delimiters. */
129
if( cDel==',' && fossil_isspace(zPat[i]) ){
130
break;
131
}
132
133
/* In regexp mode, ignore delimiters following backslashes. */
134
if( style==MS_REGEXP && zPat[i]=='\\' && zPat[i+1] ){
135
++i;
136
}
137
}
138
139
/* zOne is a zero-terminated copy of the pattern, without delimiters */
140
zOne = fossil_strndup(zPat, i);
141
zPat += i;
142
if( zPat[0] ) zPat++;
143
144
/* Check for regular expression syntax errors. */
145
if( style==MS_REGEXP ){
146
ReCompiled *regexp;
147
const char *zFail = fossil_re_compile(&regexp, zOne, 0);
148
if( zFail ){
149
re_free(regexp);
150
continue;
151
}
152
p->nPattern++;
153
p->aRe = fossil_realloc(p->aRe, sizeof(p->aRe)*p->nPattern);
154
p->aRe[p->nPattern-1] = regexp;
155
fossil_free(zOne);
156
}else{
157
p->nPattern++;
158
p->azPattern = fossil_realloc(p->azPattern, sizeof(char*)*p->nPattern);
159
p->azPattern[p->nPattern-1] = zOne;
160
}
161
}
162
return p;
163
}
164
165
/*
166
** Return non-zero (true) if the input string matches the pattern
167
** described by the matcher.
168
**
169
** The return value is really the 1-based index of the particular
170
** pattern that matched.
171
*/
172
int match_text(Matcher *p, const char *zText){
173
int i;
174
if( p==0 ){
175
return zText==0;
176
}
177
switch( p->style ){
178
case MS_BRLIST:
179
case MS_EXACT: {
180
for(i=0; i<p->nPattern; i++){
181
if( strcmp(p->azPattern[i], zText)==0 ) return i+1;
182
}
183
break;
184
}
185
case MS_GLOB: {
186
for(i=0; i<p->nPattern; i++){
187
if( sqlite3_strglob(p->azPattern[i], zText)==0 ) return i+1;
188
}
189
break;
190
}
191
case MS_LIKE: {
192
for(i=0; i<p->nPattern; i++){
193
if( sqlite3_strlike(p->azPattern[i], zText, 0)==0 ) return i+1;
194
}
195
break;
196
}
197
case MS_REGEXP: {
198
int nText = (int)strlen(zText);
199
for(i=0; i<p->nPattern; i++){
200
if( re_match(p->aRe[i], (const u8*)zText, nText) ) return i+1;
201
}
202
break;
203
}
204
}
205
return 0;
206
}
207
208
209
/*
210
** Destroy a previously allocated Matcher object.
211
*/
212
void match_free(Matcher *p){
213
int i;
214
if( p==0 ) return;
215
if( p->style==MS_REGEXP ){
216
for(i=0; i<p->nPattern; i++) re_free(p->aRe[i]);
217
fossil_free(p->aRe);
218
}else{
219
for(i=0; i<p->nPattern; i++) fossil_free(p->azPattern[i]);
220
fossil_free(p->azPattern);
221
}
222
memset(p, 0, sizeof(*p));
223
fossil_free(p);
224
}
225
226
227
228
/*
229
** Quote a tag string by surrounding it with double quotes and preceding
230
** internal double quotes and backslashes with backslashes.
231
*/
232
static const char *tagQuote(
233
int len, /* Maximum length of zTag, or negative for unlimited */
234
const char *zTag /* Tag string */
235
){
236
Blob blob = BLOB_INITIALIZER;
237
int i, j;
238
blob_zero(&blob);
239
blob_append(&blob, "\"", 1);
240
for( i=j=0; zTag[j] && (len<0 || j<len); ++j ){
241
if( zTag[j]=='\"' || zTag[j]=='\\' ){
242
if( j>i ){
243
blob_append(&blob, zTag+i, j-i);
244
}
245
blob_append(&blob, "\\", 1);
246
i = j;
247
}
248
}
249
if( j>i ){
250
blob_append(&blob, zTag+i, j-i);
251
}
252
blob_append(&blob, "\"", 1);
253
return blob_str(&blob);
254
}
255
256
/*
257
** Construct the SQL expression that goes into the WHERE clause of a join
258
** that involves the TAG table and that selects a particular tag out of
259
** that table.
260
**
261
** This function is adapted from glob_expr() to support the MS_EXACT, MS_GLOB,
262
** MS_LIKE, MS_REGEXP, and MS_BRLIST match styles.
263
**
264
** For MS_EXACT, the returned expression
265
** checks for integer match against the tag ID which is looked up directly by
266
** this function. For the other modes, the returned SQL expression performs
267
** string comparisons against the tag names, so it is necessary to join against
268
** the tag table to access the "tagname" column.
269
**
270
** Each pattern is adjusted to start with "sym-" and be anchored at end.
271
**
272
** In MS_REGEXP mode, backslash can be used to protect delimiter characters.
273
** The backslashes are not removed from the regular expression.
274
**
275
** In addition to assembling and returning an SQL expression, this function
276
** makes an English-language description of the patterns being matched, suitable
277
** for display in the web interface.
278
**
279
** If any errors arise during processing, *zError is set to an error message.
280
** Otherwise it is set to NULL.
281
*/
282
const char *match_tag_sqlexpr(
283
MatchStyle matchStyle, /* Match style code */
284
const char *zTag, /* Tag name, match pattern, or pattern list */
285
const char **zDesc, /* Output expression description string */
286
const char **zError /* Output error string */
287
){
288
Blob expr = BLOB_INITIALIZER; /* SQL expression string assembly buffer */
289
Blob desc = BLOB_INITIALIZER; /* English description of match patterns */
290
Blob err = BLOB_INITIALIZER; /* Error text assembly buffer */
291
const char *zStart; /* Text at start of expression */
292
const char *zDelimiter; /* Text between expression terms */
293
const char *zEnd; /* Text at end of expression */
294
const char *zPrefix; /* Text before each match pattern */
295
const char *zSuffix; /* Text after each match pattern */
296
const char *zIntro; /* Text introducing pattern description */
297
const char *zPattern = 0; /* Previous quoted pattern */
298
const char *zFail = 0; /* Current failure message or NULL if okay */
299
const char *zOr = " or "; /* Text before final quoted pattern */
300
char cDel; /* Input delimiter character */
301
int i; /* Input match pattern length counter */
302
303
/* Optimize exact matches by looking up the ID in advance to create a simple
304
* numeric comparison. Bypass the remainder of this function. */
305
if( matchStyle==MS_EXACT ){
306
*zDesc = tagQuote(-1, zTag);
307
return mprintf("(tagid=%d)", db_int(-1,
308
"SELECT tagid FROM tag WHERE tagname='sym-%q'", zTag));
309
}
310
311
/* Decide pattern prefix and suffix strings according to match style. */
312
if( matchStyle==MS_GLOB ){
313
zStart = "(";
314
zDelimiter = " OR ";
315
zEnd = ")";
316
zPrefix = "tagname GLOB 'sym-";
317
zSuffix = "'";
318
zIntro = "glob pattern ";
319
}else if( matchStyle==MS_LIKE ){
320
zStart = "(";
321
zDelimiter = " OR ";
322
zEnd = ")";
323
zPrefix = "tagname LIKE 'sym-";
324
zSuffix = "'";
325
zIntro = "SQL LIKE pattern ";
326
}else if( matchStyle==MS_REGEXP ){
327
zStart = "(tagname REGEXP '^sym-(";
328
zDelimiter = "|";
329
zEnd = ")$')";
330
zPrefix = "";
331
zSuffix = "";
332
zIntro = "regular expression ";
333
}else/* if( matchStyle==MS_BRLIST )*/{
334
zStart = "tagname IN ('sym-";
335
zDelimiter = "','sym-";
336
zEnd = "')";
337
zPrefix = "";
338
zSuffix = "";
339
zIntro = "";
340
}
341
342
/* Convert the list of matches into an SQL expression and text description. */
343
blob_zero(&expr);
344
blob_zero(&desc);
345
blob_zero(&err);
346
while( 1 ){
347
/* Skip leading delimiters. */
348
for( ; fossil_isspace(*zTag) || *zTag==','; ++zTag );
349
350
/* Next non-delimiter character determines quoting. */
351
if( !*zTag ){
352
/* Terminate loop at end of string. */
353
break;
354
}else if( *zTag=='\'' || *zTag=='"' ){
355
/* If word is quoted, prepare to stop at end quote. */
356
cDel = *zTag;
357
++zTag;
358
}else{
359
/* If word is not quoted, prepare to stop at delimiter. */
360
cDel = ',';
361
}
362
363
/* Find the next delimiter character or end of string. */
364
for( i=0; zTag[i] && zTag[i]!=cDel; ++i ){
365
/* If delimiter is comma, also recognize spaces as delimiters. */
366
if( cDel==',' && fossil_isspace(zTag[i]) ){
367
break;
368
}
369
370
/* In regexp mode, ignore delimiters following backslashes. */
371
if( matchStyle==MS_REGEXP && zTag[i]=='\\' && zTag[i+1] ){
372
++i;
373
}
374
}
375
376
/* Check for regular expression syntax errors. */
377
if( matchStyle==MS_REGEXP ){
378
ReCompiled *regexp;
379
char *zTagDup = fossil_strndup(zTag, i);
380
zFail = fossil_re_compile(&regexp, zTagDup, 0);
381
re_free(regexp);
382
fossil_free(zTagDup);
383
}
384
385
/* Process success and error results. */
386
if( !zFail ){
387
/* Incorporate the match word into the output expression. %q is used to
388
* protect against SQL injection attacks by replacing ' with ''. */
389
blob_appendf(&expr, "%s%s%#q%s", blob_size(&expr) ? zDelimiter : zStart,
390
zPrefix, i, zTag, zSuffix);
391
392
/* Build up the description string. */
393
if( !blob_size(&desc) ){
394
/* First tag: start with intro followed by first quoted tag. */
395
blob_append(&desc, zIntro, -1);
396
blob_append(&desc, tagQuote(i, zTag), -1);
397
}else{
398
if( zPattern ){
399
/* Third and subsequent tags: append comma then previous tag. */
400
blob_append(&desc, ", ", 2);
401
blob_append(&desc, zPattern, -1);
402
zOr = ", or ";
403
}
404
405
/* Second and subsequent tags: store quoted tag for next iteration. */
406
zPattern = tagQuote(i, zTag);
407
}
408
}else{
409
/* On error, skip the match word and build up the error message buffer. */
410
if( !blob_size(&err) ){
411
blob_append(&err, "Error: ", 7);
412
}else{
413
blob_append(&err, ", ", 2);
414
}
415
blob_appendf(&err, "(%s%s: %s)", zIntro, tagQuote(i, zTag), zFail);
416
}
417
418
/* Advance past all consumed input characters. */
419
zTag += i;
420
if( cDel!=',' && *zTag==cDel ){
421
++zTag;
422
}
423
}
424
425
/* Finalize and extract the pattern description. */
426
if( zPattern ){
427
blob_append(&desc, zOr, -1);
428
blob_append(&desc, zPattern, -1);
429
}
430
*zDesc = blob_str(&desc);
431
432
/* Finalize and extract the error text. */
433
*zError = blob_size(&err) ? blob_str(&err) : 0;
434
435
/* Finalize and extract the SQL expression. */
436
if( blob_size(&expr) ){
437
blob_append(&expr, zEnd, -1);
438
return blob_str(&expr);
439
}
440
441
/* If execution reaches this point, the pattern was empty. Return NULL. */
442
return 0;
443
}
444

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button