Fossil SCM
Optimize REGEXP matching by folding all regular expressions into one
Commit
b4800dc53d075bb81eafd392732af1e8c72ce040
Parent
c1afe56ffa50bf1…
1 file changed
+31
-24
+31
-24
| --- src/timeline.c | ||
| +++ src/timeline.c | ||
| @@ -1248,35 +1248,41 @@ | ||
| 1248 | 1248 | MatchStyle matchStyle, /* Match style code */ |
| 1249 | 1249 | const char *zTag, /* Tag name, match pattern, or list of patterns */ |
| 1250 | 1250 | int *pCount /* Pointer to match pattern count variable */ |
| 1251 | 1251 | ){ |
| 1252 | 1252 | Blob blob = BLOB_INITIALIZER; |
| 1253 | - const char *zSep = "(", *zPre, *zSuf; | |
| 1253 | + const char *zStart, *zDelimiter, *zEnd, *zPrefix, *zSuffix; | |
| 1254 | 1254 | char cDel; |
| 1255 | - int i, dummy; | |
| 1256 | - | |
| 1257 | - /* Protect against NULL count pointer. */ | |
| 1258 | - if( !pCount ){ | |
| 1259 | - pCount = &dummy; | |
| 1260 | - } | |
| 1261 | - | |
| 1262 | - /* Decide pattern prefix and suffix strings according to match style. */ | |
| 1263 | - if( matchStyle==MS_EXACT ){ | |
| 1264 | - /* Optimize exact matches by looking up the numeric ID in advance. Bypass | |
| 1265 | - * the remainder of this function. */ | |
| 1255 | + int i; | |
| 1256 | + | |
| 1257 | + /* Optimize exact matches by looking up the ID in advance to create a simple | |
| 1258 | + * numeric comparison. Bypass the remainder of this function. */ | |
| 1259 | + if( matchStyle==MS_EXACT ){ | |
| 1266 | 1260 | *pCount = 1; |
| 1267 | 1261 | return mprintf("(tagid=%d)", db_int(-1, |
| 1268 | 1262 | "SELECT tagid FROM tag WHERE tagname='sym-%q'", zTag)); |
| 1269 | - }else if( matchStyle==MS_LIKE ){ | |
| 1270 | - zPre = "LIKE 'sym-"; | |
| 1271 | - zSuf = "'"; | |
| 1263 | + } | |
| 1264 | + | |
| 1265 | + /* Decide pattern prefix and suffix strings according to match style. */ | |
| 1266 | + if( matchStyle==MS_LIKE ){ | |
| 1267 | + zStart = "("; | |
| 1268 | + zDelimiter = " OR "; | |
| 1269 | + zEnd = ")"; | |
| 1270 | + zPrefix = "tagname LIKE 'sym-"; | |
| 1271 | + zSuffix = "'"; | |
| 1272 | 1272 | }else if( matchStyle==MS_GLOB ){ |
| 1273 | - zPre = "GLOB 'sym-"; | |
| 1274 | - zSuf = "'"; | |
| 1273 | + zStart = "("; | |
| 1274 | + zDelimiter = " OR "; | |
| 1275 | + zEnd = ")"; | |
| 1276 | + zPrefix = "tagname GLOB 'sym-"; | |
| 1277 | + zSuffix = "'"; | |
| 1275 | 1278 | }else/* if( matchStyle==MS_REGEXP )*/{ |
| 1276 | - zPre = "REGEXP '^sym-"; | |
| 1277 | - zSuf = "$'"; | |
| 1279 | + zStart = "(tagname REGEXP '^sym-("; | |
| 1280 | + zDelimiter = "|"; | |
| 1281 | + zEnd = ")$')"; | |
| 1282 | + zPrefix = ""; | |
| 1283 | + zSuffix = ""; | |
| 1278 | 1284 | } |
| 1279 | 1285 | |
| 1280 | 1286 | /* Convert the list of matches into an SQL expression. */ |
| 1281 | 1287 | *pCount = 0; |
| 1282 | 1288 | blob_zero(&blob); |
| @@ -1308,27 +1314,28 @@ | ||
| 1308 | 1314 | if( matchStyle==MS_REGEXP && zTag[i]=='\\' && zTag[i+1] ){ |
| 1309 | 1315 | ++i; |
| 1310 | 1316 | } |
| 1311 | 1317 | } |
| 1312 | 1318 | |
| 1313 | - /* Incorporate the match word into the final expression. */ | |
| 1314 | - blob_appendf(&blob, "%stagname %s%#q%s", zSep, zPre, i, zTag, zSuf); | |
| 1319 | + /* Incorporate the match word into the output expression. The %q format is | |
| 1320 | + * used to protect against SQL injection attacks by replacing ' with ''. */ | |
| 1321 | + blob_appendf(&blob, "%s%s%#q%s", *pCount ? zDelimiter : zStart, | |
| 1322 | + zPrefix, i, zTag, zSuffix); | |
| 1315 | 1323 | |
| 1316 | 1324 | /* Keep track of the number of match expressions. */ |
| 1317 | 1325 | ++*pCount; |
| 1318 | 1326 | |
| 1319 | - /* Prepare for the next match word. */ | |
| 1327 | + /* Advance past all consumed input characters. */ | |
| 1320 | 1328 | zTag += i; |
| 1321 | 1329 | if( cDel!=',' && *zTag==cDel ){ |
| 1322 | 1330 | ++zTag; |
| 1323 | 1331 | } |
| 1324 | - zSep = " OR "; | |
| 1325 | 1332 | } |
| 1326 | 1333 | |
| 1327 | 1334 | /* Finalize and extract the SQL expression. */ |
| 1328 | 1335 | if( *pCount ){ |
| 1329 | - blob_append(&blob, ")", 1); | |
| 1336 | + blob_append(&blob, zEnd, -1); | |
| 1330 | 1337 | return blob_str(&blob); |
| 1331 | 1338 | } |
| 1332 | 1339 | |
| 1333 | 1340 | /* If execution reaches this point, the pattern was empty. Return NULL. */ |
| 1334 | 1341 | return 0; |
| 1335 | 1342 |
| --- src/timeline.c | |
| +++ src/timeline.c | |
| @@ -1248,35 +1248,41 @@ | |
| 1248 | MatchStyle matchStyle, /* Match style code */ |
| 1249 | const char *zTag, /* Tag name, match pattern, or list of patterns */ |
| 1250 | int *pCount /* Pointer to match pattern count variable */ |
| 1251 | ){ |
| 1252 | Blob blob = BLOB_INITIALIZER; |
| 1253 | const char *zSep = "(", *zPre, *zSuf; |
| 1254 | char cDel; |
| 1255 | int i, dummy; |
| 1256 | |
| 1257 | /* Protect against NULL count pointer. */ |
| 1258 | if( !pCount ){ |
| 1259 | pCount = &dummy; |
| 1260 | } |
| 1261 | |
| 1262 | /* Decide pattern prefix and suffix strings according to match style. */ |
| 1263 | if( matchStyle==MS_EXACT ){ |
| 1264 | /* Optimize exact matches by looking up the numeric ID in advance. Bypass |
| 1265 | * the remainder of this function. */ |
| 1266 | *pCount = 1; |
| 1267 | return mprintf("(tagid=%d)", db_int(-1, |
| 1268 | "SELECT tagid FROM tag WHERE tagname='sym-%q'", zTag)); |
| 1269 | }else if( matchStyle==MS_LIKE ){ |
| 1270 | zPre = "LIKE 'sym-"; |
| 1271 | zSuf = "'"; |
| 1272 | }else if( matchStyle==MS_GLOB ){ |
| 1273 | zPre = "GLOB 'sym-"; |
| 1274 | zSuf = "'"; |
| 1275 | }else/* if( matchStyle==MS_REGEXP )*/{ |
| 1276 | zPre = "REGEXP '^sym-"; |
| 1277 | zSuf = "$'"; |
| 1278 | } |
| 1279 | |
| 1280 | /* Convert the list of matches into an SQL expression. */ |
| 1281 | *pCount = 0; |
| 1282 | blob_zero(&blob); |
| @@ -1308,27 +1314,28 @@ | |
| 1308 | if( matchStyle==MS_REGEXP && zTag[i]=='\\' && zTag[i+1] ){ |
| 1309 | ++i; |
| 1310 | } |
| 1311 | } |
| 1312 | |
| 1313 | /* Incorporate the match word into the final expression. */ |
| 1314 | blob_appendf(&blob, "%stagname %s%#q%s", zSep, zPre, i, zTag, zSuf); |
| 1315 | |
| 1316 | /* Keep track of the number of match expressions. */ |
| 1317 | ++*pCount; |
| 1318 | |
| 1319 | /* Prepare for the next match word. */ |
| 1320 | zTag += i; |
| 1321 | if( cDel!=',' && *zTag==cDel ){ |
| 1322 | ++zTag; |
| 1323 | } |
| 1324 | zSep = " OR "; |
| 1325 | } |
| 1326 | |
| 1327 | /* Finalize and extract the SQL expression. */ |
| 1328 | if( *pCount ){ |
| 1329 | blob_append(&blob, ")", 1); |
| 1330 | return blob_str(&blob); |
| 1331 | } |
| 1332 | |
| 1333 | /* If execution reaches this point, the pattern was empty. Return NULL. */ |
| 1334 | return 0; |
| 1335 |
| --- src/timeline.c | |
| +++ src/timeline.c | |
| @@ -1248,35 +1248,41 @@ | |
| 1248 | MatchStyle matchStyle, /* Match style code */ |
| 1249 | const char *zTag, /* Tag name, match pattern, or list of patterns */ |
| 1250 | int *pCount /* Pointer to match pattern count variable */ |
| 1251 | ){ |
| 1252 | Blob blob = BLOB_INITIALIZER; |
| 1253 | const char *zStart, *zDelimiter, *zEnd, *zPrefix, *zSuffix; |
| 1254 | char cDel; |
| 1255 | int i; |
| 1256 | |
| 1257 | /* Optimize exact matches by looking up the ID in advance to create a simple |
| 1258 | * numeric comparison. Bypass the remainder of this function. */ |
| 1259 | if( matchStyle==MS_EXACT ){ |
| 1260 | *pCount = 1; |
| 1261 | return mprintf("(tagid=%d)", db_int(-1, |
| 1262 | "SELECT tagid FROM tag WHERE tagname='sym-%q'", zTag)); |
| 1263 | } |
| 1264 | |
| 1265 | /* Decide pattern prefix and suffix strings according to match style. */ |
| 1266 | if( matchStyle==MS_LIKE ){ |
| 1267 | zStart = "("; |
| 1268 | zDelimiter = " OR "; |
| 1269 | zEnd = ")"; |
| 1270 | zPrefix = "tagname LIKE 'sym-"; |
| 1271 | zSuffix = "'"; |
| 1272 | }else if( matchStyle==MS_GLOB ){ |
| 1273 | zStart = "("; |
| 1274 | zDelimiter = " OR "; |
| 1275 | zEnd = ")"; |
| 1276 | zPrefix = "tagname GLOB 'sym-"; |
| 1277 | zSuffix = "'"; |
| 1278 | }else/* if( matchStyle==MS_REGEXP )*/{ |
| 1279 | zStart = "(tagname REGEXP '^sym-("; |
| 1280 | zDelimiter = "|"; |
| 1281 | zEnd = ")$')"; |
| 1282 | zPrefix = ""; |
| 1283 | zSuffix = ""; |
| 1284 | } |
| 1285 | |
| 1286 | /* Convert the list of matches into an SQL expression. */ |
| 1287 | *pCount = 0; |
| 1288 | blob_zero(&blob); |
| @@ -1308,27 +1314,28 @@ | |
| 1314 | if( matchStyle==MS_REGEXP && zTag[i]=='\\' && zTag[i+1] ){ |
| 1315 | ++i; |
| 1316 | } |
| 1317 | } |
| 1318 | |
| 1319 | /* Incorporate the match word into the output expression. The %q format is |
| 1320 | * used to protect against SQL injection attacks by replacing ' with ''. */ |
| 1321 | blob_appendf(&blob, "%s%s%#q%s", *pCount ? zDelimiter : zStart, |
| 1322 | zPrefix, i, zTag, zSuffix); |
| 1323 | |
| 1324 | /* Keep track of the number of match expressions. */ |
| 1325 | ++*pCount; |
| 1326 | |
| 1327 | /* Advance past all consumed input characters. */ |
| 1328 | zTag += i; |
| 1329 | if( cDel!=',' && *zTag==cDel ){ |
| 1330 | ++zTag; |
| 1331 | } |
| 1332 | } |
| 1333 | |
| 1334 | /* Finalize and extract the SQL expression. */ |
| 1335 | if( *pCount ){ |
| 1336 | blob_append(&blob, zEnd, -1); |
| 1337 | return blob_str(&blob); |
| 1338 | } |
| 1339 | |
| 1340 | /* If execution reaches this point, the pattern was empty. Return NULL. */ |
| 1341 | return 0; |
| 1342 |