Fossil SCM

Optimize REGEXP matching by folding all regular expressions into one

andygoth 2016-10-23 21:50 andygoth-timeline-ms
Commit b4800dc53d075bb81eafd392732af1e8c72ce040
1 file changed +31 -24
+31 -24
--- src/timeline.c
+++ src/timeline.c
@@ -1248,35 +1248,41 @@
12481248
MatchStyle matchStyle, /* Match style code */
12491249
const char *zTag, /* Tag name, match pattern, or list of patterns */
12501250
int *pCount /* Pointer to match pattern count variable */
12511251
){
12521252
Blob blob = BLOB_INITIALIZER;
1253
- const char *zSep = "(", *zPre, *zSuf;
1253
+ const char *zStart, *zDelimiter, *zEnd, *zPrefix, *zSuffix;
12541254
char cDel;
1255
- int i, dummy;
1256
-
1257
- /* Protect against NULL count pointer. */
1258
- if( !pCount ){
1259
- pCount = &dummy;
1260
- }
1261
-
1262
- /* Decide pattern prefix and suffix strings according to match style. */
1263
- if( matchStyle==MS_EXACT ){
1264
- /* Optimize exact matches by looking up the numeric ID in advance. Bypass
1265
- * the remainder of this function. */
1255
+ int i;
1256
+
1257
+ /* Optimize exact matches by looking up the ID in advance to create a simple
1258
+ * numeric comparison. Bypass the remainder of this function. */
1259
+ if( matchStyle==MS_EXACT ){
12661260
*pCount = 1;
12671261
return mprintf("(tagid=%d)", db_int(-1,
12681262
"SELECT tagid FROM tag WHERE tagname='sym-%q'", zTag));
1269
- }else if( matchStyle==MS_LIKE ){
1270
- zPre = "LIKE 'sym-";
1271
- zSuf = "'";
1263
+ }
1264
+
1265
+ /* Decide pattern prefix and suffix strings according to match style. */
1266
+ if( matchStyle==MS_LIKE ){
1267
+ zStart = "(";
1268
+ zDelimiter = " OR ";
1269
+ zEnd = ")";
1270
+ zPrefix = "tagname LIKE 'sym-";
1271
+ zSuffix = "'";
12721272
}else if( matchStyle==MS_GLOB ){
1273
- zPre = "GLOB 'sym-";
1274
- zSuf = "'";
1273
+ zStart = "(";
1274
+ zDelimiter = " OR ";
1275
+ zEnd = ")";
1276
+ zPrefix = "tagname GLOB 'sym-";
1277
+ zSuffix = "'";
12751278
}else/* if( matchStyle==MS_REGEXP )*/{
1276
- zPre = "REGEXP '^sym-";
1277
- zSuf = "$'";
1279
+ zStart = "(tagname REGEXP '^sym-(";
1280
+ zDelimiter = "|";
1281
+ zEnd = ")$')";
1282
+ zPrefix = "";
1283
+ zSuffix = "";
12781284
}
12791285
12801286
/* Convert the list of matches into an SQL expression. */
12811287
*pCount = 0;
12821288
blob_zero(&blob);
@@ -1308,27 +1314,28 @@
13081314
if( matchStyle==MS_REGEXP && zTag[i]=='\\' && zTag[i+1] ){
13091315
++i;
13101316
}
13111317
}
13121318
1313
- /* Incorporate the match word into the final expression. */
1314
- blob_appendf(&blob, "%stagname %s%#q%s", zSep, zPre, i, zTag, zSuf);
1319
+ /* Incorporate the match word into the output expression. The %q format is
1320
+ * used to protect against SQL injection attacks by replacing ' with ''. */
1321
+ blob_appendf(&blob, "%s%s%#q%s", *pCount ? zDelimiter : zStart,
1322
+ zPrefix, i, zTag, zSuffix);
13151323
13161324
/* Keep track of the number of match expressions. */
13171325
++*pCount;
13181326
1319
- /* Prepare for the next match word. */
1327
+ /* Advance past all consumed input characters. */
13201328
zTag += i;
13211329
if( cDel!=',' && *zTag==cDel ){
13221330
++zTag;
13231331
}
1324
- zSep = " OR ";
13251332
}
13261333
13271334
/* Finalize and extract the SQL expression. */
13281335
if( *pCount ){
1329
- blob_append(&blob, ")", 1);
1336
+ blob_append(&blob, zEnd, -1);
13301337
return blob_str(&blob);
13311338
}
13321339
13331340
/* If execution reaches this point, the pattern was empty. Return NULL. */
13341341
return 0;
13351342
--- src/timeline.c
+++ src/timeline.c
@@ -1248,35 +1248,41 @@
1248 MatchStyle matchStyle, /* Match style code */
1249 const char *zTag, /* Tag name, match pattern, or list of patterns */
1250 int *pCount /* Pointer to match pattern count variable */
1251 ){
1252 Blob blob = BLOB_INITIALIZER;
1253 const char *zSep = "(", *zPre, *zSuf;
1254 char cDel;
1255 int i, dummy;
1256
1257 /* Protect against NULL count pointer. */
1258 if( !pCount ){
1259 pCount = &dummy;
1260 }
1261
1262 /* Decide pattern prefix and suffix strings according to match style. */
1263 if( matchStyle==MS_EXACT ){
1264 /* Optimize exact matches by looking up the numeric ID in advance. Bypass
1265 * the remainder of this function. */
1266 *pCount = 1;
1267 return mprintf("(tagid=%d)", db_int(-1,
1268 "SELECT tagid FROM tag WHERE tagname='sym-%q'", zTag));
1269 }else if( matchStyle==MS_LIKE ){
1270 zPre = "LIKE 'sym-";
1271 zSuf = "'";
 
 
 
 
 
 
1272 }else if( matchStyle==MS_GLOB ){
1273 zPre = "GLOB 'sym-";
1274 zSuf = "'";
 
 
 
1275 }else/* if( matchStyle==MS_REGEXP )*/{
1276 zPre = "REGEXP '^sym-";
1277 zSuf = "$'";
 
 
 
1278 }
1279
1280 /* Convert the list of matches into an SQL expression. */
1281 *pCount = 0;
1282 blob_zero(&blob);
@@ -1308,27 +1314,28 @@
1308 if( matchStyle==MS_REGEXP && zTag[i]=='\\' && zTag[i+1] ){
1309 ++i;
1310 }
1311 }
1312
1313 /* Incorporate the match word into the final expression. */
1314 blob_appendf(&blob, "%stagname %s%#q%s", zSep, zPre, i, zTag, zSuf);
 
 
1315
1316 /* Keep track of the number of match expressions. */
1317 ++*pCount;
1318
1319 /* Prepare for the next match word. */
1320 zTag += i;
1321 if( cDel!=',' && *zTag==cDel ){
1322 ++zTag;
1323 }
1324 zSep = " OR ";
1325 }
1326
1327 /* Finalize and extract the SQL expression. */
1328 if( *pCount ){
1329 blob_append(&blob, ")", 1);
1330 return blob_str(&blob);
1331 }
1332
1333 /* If execution reaches this point, the pattern was empty. Return NULL. */
1334 return 0;
1335
--- src/timeline.c
+++ src/timeline.c
@@ -1248,35 +1248,41 @@
1248 MatchStyle matchStyle, /* Match style code */
1249 const char *zTag, /* Tag name, match pattern, or list of patterns */
1250 int *pCount /* Pointer to match pattern count variable */
1251 ){
1252 Blob blob = BLOB_INITIALIZER;
1253 const char *zStart, *zDelimiter, *zEnd, *zPrefix, *zSuffix;
1254 char cDel;
1255 int i;
1256
1257 /* Optimize exact matches by looking up the ID in advance to create a simple
1258 * numeric comparison. Bypass the remainder of this function. */
1259 if( matchStyle==MS_EXACT ){
 
 
 
 
 
 
1260 *pCount = 1;
1261 return mprintf("(tagid=%d)", db_int(-1,
1262 "SELECT tagid FROM tag WHERE tagname='sym-%q'", zTag));
1263 }
1264
1265 /* Decide pattern prefix and suffix strings according to match style. */
1266 if( matchStyle==MS_LIKE ){
1267 zStart = "(";
1268 zDelimiter = " OR ";
1269 zEnd = ")";
1270 zPrefix = "tagname LIKE 'sym-";
1271 zSuffix = "'";
1272 }else if( matchStyle==MS_GLOB ){
1273 zStart = "(";
1274 zDelimiter = " OR ";
1275 zEnd = ")";
1276 zPrefix = "tagname GLOB 'sym-";
1277 zSuffix = "'";
1278 }else/* if( matchStyle==MS_REGEXP )*/{
1279 zStart = "(tagname REGEXP '^sym-(";
1280 zDelimiter = "|";
1281 zEnd = ")$')";
1282 zPrefix = "";
1283 zSuffix = "";
1284 }
1285
1286 /* Convert the list of matches into an SQL expression. */
1287 *pCount = 0;
1288 blob_zero(&blob);
@@ -1308,27 +1314,28 @@
1314 if( matchStyle==MS_REGEXP && zTag[i]=='\\' && zTag[i+1] ){
1315 ++i;
1316 }
1317 }
1318
1319 /* Incorporate the match word into the output expression. The %q format is
1320 * used to protect against SQL injection attacks by replacing ' with ''. */
1321 blob_appendf(&blob, "%s%s%#q%s", *pCount ? zDelimiter : zStart,
1322 zPrefix, i, zTag, zSuffix);
1323
1324 /* Keep track of the number of match expressions. */
1325 ++*pCount;
1326
1327 /* Advance past all consumed input characters. */
1328 zTag += i;
1329 if( cDel!=',' && *zTag==cDel ){
1330 ++zTag;
1331 }
 
1332 }
1333
1334 /* Finalize and extract the SQL expression. */
1335 if( *pCount ){
1336 blob_append(&blob, zEnd, -1);
1337 return blob_str(&blob);
1338 }
1339
1340 /* If execution reaches this point, the pattern was empty. Return NULL. */
1341 return 0;
1342

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button