Fossil SCM
Rename re_execute() to re_match(). Other fixes to regex matcher bugs that were introduced when porting the code from SQLite.
Commit
c84051f38db377f5bd7843b9f72fb0b5ca38f5c6
Parent
a13e0a20a8752b5…
2 files changed
+1
-1
+13
-11
+1
-1
| --- src/diff.c | ||
| +++ src/diff.c | ||
| @@ -423,11 +423,11 @@ | ||
| 423 | 423 | ReCompiled *pRe, /* The regular expression to be matched */ |
| 424 | 424 | DLine *aDLine, /* First of N DLines to compare against */ |
| 425 | 425 | int N /* Number of DLines to check */ |
| 426 | 426 | ){ |
| 427 | 427 | while( N-- ){ |
| 428 | - if( re_execute(pRe, (const unsigned char *)aDLine->z, LENGTH(aDLine)) ){ | |
| 428 | + if( re_match(pRe, (const unsigned char *)aDLine->z, LENGTH(aDLine)) ){ | |
| 429 | 429 | return 1; |
| 430 | 430 | } |
| 431 | 431 | aDLine++; |
| 432 | 432 | } |
| 433 | 433 | return 0; |
| 434 | 434 |
| --- src/diff.c | |
| +++ src/diff.c | |
| @@ -423,11 +423,11 @@ | |
| 423 | ReCompiled *pRe, /* The regular expression to be matched */ |
| 424 | DLine *aDLine, /* First of N DLines to compare against */ |
| 425 | int N /* Number of DLines to check */ |
| 426 | ){ |
| 427 | while( N-- ){ |
| 428 | if( re_execute(pRe, (const unsigned char *)aDLine->z, LENGTH(aDLine)) ){ |
| 429 | return 1; |
| 430 | } |
| 431 | aDLine++; |
| 432 | } |
| 433 | return 0; |
| 434 |
| --- src/diff.c | |
| +++ src/diff.c | |
| @@ -423,11 +423,11 @@ | |
| 423 | ReCompiled *pRe, /* The regular expression to be matched */ |
| 424 | DLine *aDLine, /* First of N DLines to compare against */ |
| 425 | int N /* Number of DLines to check */ |
| 426 | ){ |
| 427 | while( N-- ){ |
| 428 | if( re_match(pRe, (const unsigned char *)aDLine->z, LENGTH(aDLine)) ){ |
| 429 | return 1; |
| 430 | } |
| 431 | aDLine++; |
| 432 | } |
| 433 | return 0; |
| 434 |
+13
-11
| --- src/regexp.c | ||
| +++ src/regexp.c | ||
| @@ -178,11 +178,11 @@ | ||
| 178 | 178 | } |
| 179 | 179 | |
| 180 | 180 | /* Run a compiled regular expression on the zero-terminated input |
| 181 | 181 | ** string zIn[]. Return true on a match and false if there is no match. |
| 182 | 182 | */ |
| 183 | -int re_execute(ReCompiled *pRe, const unsigned char *zIn, int nIn){ | |
| 183 | +int re_match(ReCompiled *pRe, const unsigned char *zIn, int nIn){ | |
| 184 | 184 | ReStateSet aStateSet[2], *pThis, *pNext; |
| 185 | 185 | ReStateNumber aSpace[100]; |
| 186 | 186 | ReStateNumber *pToFree; |
| 187 | 187 | unsigned int i = 0; |
| 188 | 188 | unsigned int iSwap = 0; |
| @@ -194,16 +194,16 @@ | ||
| 194 | 194 | in.z = zIn; |
| 195 | 195 | in.i = 0; |
| 196 | 196 | in.mx = nIn>=0 ? nIn : strlen((char const*)zIn); |
| 197 | 197 | if( pRe->nInit ){ |
| 198 | 198 | unsigned char x = pRe->zInit[0]; |
| 199 | - while( in.i+pRe->nInit<in.mx | |
| 199 | + while( in.i+pRe->nInit<=in.mx | |
| 200 | 200 | && (zIn[in.i]!=x || memcmp(zIn+in.i, pRe->zInit, pRe->nInit)!=0) |
| 201 | 201 | ){ |
| 202 | 202 | in.i++; |
| 203 | 203 | } |
| 204 | - if( in.i+pRe->nInit>=in.mx ) return 0; | |
| 204 | + if( in.i+pRe->nInit>in.mx ) return 0; | |
| 205 | 205 | } |
| 206 | 206 | if( pRe->nState<=(sizeof(aSpace)/(sizeof(aSpace[0])*2)) ){ |
| 207 | 207 | pToFree = 0; |
| 208 | 208 | aStateSet[0].aState = aSpace; |
| 209 | 209 | }else{ |
| @@ -275,11 +275,11 @@ | ||
| 275 | 275 | re_add_state(pThis, x+pRe->aArg[x]); |
| 276 | 276 | break; |
| 277 | 277 | } |
| 278 | 278 | case RE_OP_ACCEPT: { |
| 279 | 279 | rc = 1; |
| 280 | - goto re_execute_end; | |
| 280 | + goto re_match_end; | |
| 281 | 281 | } |
| 282 | 282 | case RE_OP_CC_INC: |
| 283 | 283 | case RE_OP_CC_EXC: { |
| 284 | 284 | int j = 1; |
| 285 | 285 | int n = pRe->aArg[x]; |
| @@ -307,11 +307,11 @@ | ||
| 307 | 307 | } |
| 308 | 308 | } |
| 309 | 309 | for(i=0; i<pNext->nState; i++){ |
| 310 | 310 | if( pRe->aOp[pNext->aState[i]]==RE_OP_ACCEPT ){ rc = 1; break; } |
| 311 | 311 | } |
| 312 | -re_execute_end: | |
| 312 | +re_match_end: | |
| 313 | 313 | fossil_free(pToFree); |
| 314 | 314 | return rc; |
| 315 | 315 | } |
| 316 | 316 | |
| 317 | 317 | /* Resize the opcode and argument arrays for an RE under construction. |
| @@ -386,11 +386,11 @@ | ||
| 386 | 386 | static const char zEsc[] = "afnrtv\\()*.+?[$^{|}]"; |
| 387 | 387 | static const char zTrans[] = "\a\f\n\r\t\v"; |
| 388 | 388 | int i, v = 0; |
| 389 | 389 | char c; |
| 390 | 390 | if( p->sIn.i>=p->sIn.mx ) return 0; |
| 391 | - c = p->sIn.z[0]; | |
| 391 | + c = p->sIn.z[p->sIn.i]; | |
| 392 | 392 | if( c=='u' && p->sIn.i+5<p->sIn.mx ){ |
| 393 | 393 | v = 0; |
| 394 | 394 | const unsigned char *zIn = p->sIn.z + p->sIn.i; |
| 395 | 395 | if( re_hex(zIn[1],&v) |
| 396 | 396 | && re_hex(zIn[2],&v) |
| @@ -594,16 +594,17 @@ | ||
| 594 | 594 | */ |
| 595 | 595 | void re_free(ReCompiled *pRe){ |
| 596 | 596 | if( pRe ){ |
| 597 | 597 | fossil_free(pRe->aOp); |
| 598 | 598 | fossil_free(pRe->aArg); |
| 599 | + fossil_free(pRe); | |
| 599 | 600 | } |
| 600 | 601 | } |
| 601 | 602 | |
| 602 | 603 | /* |
| 603 | 604 | ** Compile a textual regular expression in zIn[] into a compiled regular |
| 604 | -** expression suitable for us by re_execute() and return a pointer to the | |
| 605 | +** expression suitable for us by re_match() and return a pointer to the | |
| 605 | 606 | ** compiled regular expression in *ppRe. Return NULL on success or an |
| 606 | 607 | ** error message if something goes wrong. |
| 607 | 608 | */ |
| 608 | 609 | const char *re_compile(ReCompiled **ppRe, const char *zIn, int noCase){ |
| 609 | 610 | ReCompiled *pRe; |
| @@ -626,17 +627,17 @@ | ||
| 626 | 627 | }else{ |
| 627 | 628 | re_append(pRe, RE_OP_ANYSTAR, 0); |
| 628 | 629 | } |
| 629 | 630 | pRe->sIn.z = (unsigned char*)zIn; |
| 630 | 631 | pRe->sIn.i = 0; |
| 631 | - pRe->sIn.mx = strlen(zIn); | |
| 632 | + pRe->sIn.mx = strlen(pRe->sIn.z); | |
| 632 | 633 | zErr = re_subcompile_re(pRe); |
| 633 | 634 | if( zErr ){ |
| 634 | 635 | re_free(pRe); |
| 635 | 636 | return zErr; |
| 636 | 637 | } |
| 637 | - if( rePeek(pRe)=='$' && pRe->sIn.i+1==pRe->sIn.mx ){ | |
| 638 | + if( rePeek(pRe)=='$' && pRe->sIn.i+1>=pRe->sIn.mx ){ | |
| 638 | 639 | re_append(pRe, RE_OP_MATCH, RE_EOF); |
| 639 | 640 | re_append(pRe, RE_OP_ACCEPT, 0); |
| 640 | 641 | *ppRe = pRe; |
| 641 | 642 | }else if( pRe->sIn.i>=pRe->sIn.mx ){ |
| 642 | 643 | re_append(pRe, RE_OP_ACCEPT, 0); |
| @@ -659,10 +660,11 @@ | ||
| 659 | 660 | pRe->zInit[j++] = 0x80 | ((x>>6)&0x3f); |
| 660 | 661 | }else{ |
| 661 | 662 | break; |
| 662 | 663 | } |
| 663 | 664 | } |
| 665 | + if( j>0 && pRe->zInit[j-1]==0 ) j--; | |
| 664 | 666 | pRe->nInit = j; |
| 665 | 667 | } |
| 666 | 668 | return pRe->zErr; |
| 667 | 669 | } |
| 668 | 670 | |
| @@ -700,11 +702,11 @@ | ||
| 700 | 702 | } |
| 701 | 703 | sqlite3_set_auxdata(context, 0, pRe, (void(*)(void*))re_free); |
| 702 | 704 | } |
| 703 | 705 | zStr = (const unsigned char*)sqlite3_value_text(argv[1]); |
| 704 | 706 | if( zStr!=0 ){ |
| 705 | - sqlite3_result_int(context, re_execute(pRe, zStr, -1)); | |
| 707 | + sqlite3_result_int(context, re_match(pRe, zStr, -1)); | |
| 706 | 708 | } |
| 707 | 709 | } |
| 708 | 710 | |
| 709 | 711 | /* |
| 710 | 712 | ** Invoke this routine in order to install the REGEXP function in an |
| @@ -731,11 +733,11 @@ | ||
| 731 | 733 | char zLine[2000]; |
| 732 | 734 | while( fgets(zLine, sizeof(zLine), in) ){ |
| 733 | 735 | ln++; |
| 734 | 736 | n = (int)strlen(zLine); |
| 735 | 737 | while( n && (zLine[n-1]=='\n' || zLine[n-1]=='\r') ) n--; |
| 736 | - if( re_execute(pRe, (const unsigned char*)zLine, n) ){ | |
| 738 | + if( re_match(pRe, (const unsigned char*)zLine, n) ){ | |
| 737 | 739 | printf("%s:%d:%.*s\n", zFile, ln, n, zLine); |
| 738 | 740 | } |
| 739 | 741 | } |
| 740 | 742 | } |
| 741 | 743 | |
| 742 | 744 |
| --- src/regexp.c | |
| +++ src/regexp.c | |
| @@ -178,11 +178,11 @@ | |
| 178 | } |
| 179 | |
| 180 | /* Run a compiled regular expression on the zero-terminated input |
| 181 | ** string zIn[]. Return true on a match and false if there is no match. |
| 182 | */ |
| 183 | int re_execute(ReCompiled *pRe, const unsigned char *zIn, int nIn){ |
| 184 | ReStateSet aStateSet[2], *pThis, *pNext; |
| 185 | ReStateNumber aSpace[100]; |
| 186 | ReStateNumber *pToFree; |
| 187 | unsigned int i = 0; |
| 188 | unsigned int iSwap = 0; |
| @@ -194,16 +194,16 @@ | |
| 194 | in.z = zIn; |
| 195 | in.i = 0; |
| 196 | in.mx = nIn>=0 ? nIn : strlen((char const*)zIn); |
| 197 | if( pRe->nInit ){ |
| 198 | unsigned char x = pRe->zInit[0]; |
| 199 | while( in.i+pRe->nInit<in.mx |
| 200 | && (zIn[in.i]!=x || memcmp(zIn+in.i, pRe->zInit, pRe->nInit)!=0) |
| 201 | ){ |
| 202 | in.i++; |
| 203 | } |
| 204 | if( in.i+pRe->nInit>=in.mx ) return 0; |
| 205 | } |
| 206 | if( pRe->nState<=(sizeof(aSpace)/(sizeof(aSpace[0])*2)) ){ |
| 207 | pToFree = 0; |
| 208 | aStateSet[0].aState = aSpace; |
| 209 | }else{ |
| @@ -275,11 +275,11 @@ | |
| 275 | re_add_state(pThis, x+pRe->aArg[x]); |
| 276 | break; |
| 277 | } |
| 278 | case RE_OP_ACCEPT: { |
| 279 | rc = 1; |
| 280 | goto re_execute_end; |
| 281 | } |
| 282 | case RE_OP_CC_INC: |
| 283 | case RE_OP_CC_EXC: { |
| 284 | int j = 1; |
| 285 | int n = pRe->aArg[x]; |
| @@ -307,11 +307,11 @@ | |
| 307 | } |
| 308 | } |
| 309 | for(i=0; i<pNext->nState; i++){ |
| 310 | if( pRe->aOp[pNext->aState[i]]==RE_OP_ACCEPT ){ rc = 1; break; } |
| 311 | } |
| 312 | re_execute_end: |
| 313 | fossil_free(pToFree); |
| 314 | return rc; |
| 315 | } |
| 316 | |
| 317 | /* Resize the opcode and argument arrays for an RE under construction. |
| @@ -386,11 +386,11 @@ | |
| 386 | static const char zEsc[] = "afnrtv\\()*.+?[$^{|}]"; |
| 387 | static const char zTrans[] = "\a\f\n\r\t\v"; |
| 388 | int i, v = 0; |
| 389 | char c; |
| 390 | if( p->sIn.i>=p->sIn.mx ) return 0; |
| 391 | c = p->sIn.z[0]; |
| 392 | if( c=='u' && p->sIn.i+5<p->sIn.mx ){ |
| 393 | v = 0; |
| 394 | const unsigned char *zIn = p->sIn.z + p->sIn.i; |
| 395 | if( re_hex(zIn[1],&v) |
| 396 | && re_hex(zIn[2],&v) |
| @@ -594,16 +594,17 @@ | |
| 594 | */ |
| 595 | void re_free(ReCompiled *pRe){ |
| 596 | if( pRe ){ |
| 597 | fossil_free(pRe->aOp); |
| 598 | fossil_free(pRe->aArg); |
| 599 | } |
| 600 | } |
| 601 | |
| 602 | /* |
| 603 | ** Compile a textual regular expression in zIn[] into a compiled regular |
| 604 | ** expression suitable for us by re_execute() and return a pointer to the |
| 605 | ** compiled regular expression in *ppRe. Return NULL on success or an |
| 606 | ** error message if something goes wrong. |
| 607 | */ |
| 608 | const char *re_compile(ReCompiled **ppRe, const char *zIn, int noCase){ |
| 609 | ReCompiled *pRe; |
| @@ -626,17 +627,17 @@ | |
| 626 | }else{ |
| 627 | re_append(pRe, RE_OP_ANYSTAR, 0); |
| 628 | } |
| 629 | pRe->sIn.z = (unsigned char*)zIn; |
| 630 | pRe->sIn.i = 0; |
| 631 | pRe->sIn.mx = strlen(zIn); |
| 632 | zErr = re_subcompile_re(pRe); |
| 633 | if( zErr ){ |
| 634 | re_free(pRe); |
| 635 | return zErr; |
| 636 | } |
| 637 | if( rePeek(pRe)=='$' && pRe->sIn.i+1==pRe->sIn.mx ){ |
| 638 | re_append(pRe, RE_OP_MATCH, RE_EOF); |
| 639 | re_append(pRe, RE_OP_ACCEPT, 0); |
| 640 | *ppRe = pRe; |
| 641 | }else if( pRe->sIn.i>=pRe->sIn.mx ){ |
| 642 | re_append(pRe, RE_OP_ACCEPT, 0); |
| @@ -659,10 +660,11 @@ | |
| 659 | pRe->zInit[j++] = 0x80 | ((x>>6)&0x3f); |
| 660 | }else{ |
| 661 | break; |
| 662 | } |
| 663 | } |
| 664 | pRe->nInit = j; |
| 665 | } |
| 666 | return pRe->zErr; |
| 667 | } |
| 668 | |
| @@ -700,11 +702,11 @@ | |
| 700 | } |
| 701 | sqlite3_set_auxdata(context, 0, pRe, (void(*)(void*))re_free); |
| 702 | } |
| 703 | zStr = (const unsigned char*)sqlite3_value_text(argv[1]); |
| 704 | if( zStr!=0 ){ |
| 705 | sqlite3_result_int(context, re_execute(pRe, zStr, -1)); |
| 706 | } |
| 707 | } |
| 708 | |
| 709 | /* |
| 710 | ** Invoke this routine in order to install the REGEXP function in an |
| @@ -731,11 +733,11 @@ | |
| 731 | char zLine[2000]; |
| 732 | while( fgets(zLine, sizeof(zLine), in) ){ |
| 733 | ln++; |
| 734 | n = (int)strlen(zLine); |
| 735 | while( n && (zLine[n-1]=='\n' || zLine[n-1]=='\r') ) n--; |
| 736 | if( re_execute(pRe, (const unsigned char*)zLine, n) ){ |
| 737 | printf("%s:%d:%.*s\n", zFile, ln, n, zLine); |
| 738 | } |
| 739 | } |
| 740 | } |
| 741 | |
| 742 |
| --- src/regexp.c | |
| +++ src/regexp.c | |
| @@ -178,11 +178,11 @@ | |
| 178 | } |
| 179 | |
| 180 | /* Run a compiled regular expression on the zero-terminated input |
| 181 | ** string zIn[]. Return true on a match and false if there is no match. |
| 182 | */ |
| 183 | int re_match(ReCompiled *pRe, const unsigned char *zIn, int nIn){ |
| 184 | ReStateSet aStateSet[2], *pThis, *pNext; |
| 185 | ReStateNumber aSpace[100]; |
| 186 | ReStateNumber *pToFree; |
| 187 | unsigned int i = 0; |
| 188 | unsigned int iSwap = 0; |
| @@ -194,16 +194,16 @@ | |
| 194 | in.z = zIn; |
| 195 | in.i = 0; |
| 196 | in.mx = nIn>=0 ? nIn : strlen((char const*)zIn); |
| 197 | if( pRe->nInit ){ |
| 198 | unsigned char x = pRe->zInit[0]; |
| 199 | while( in.i+pRe->nInit<=in.mx |
| 200 | && (zIn[in.i]!=x || memcmp(zIn+in.i, pRe->zInit, pRe->nInit)!=0) |
| 201 | ){ |
| 202 | in.i++; |
| 203 | } |
| 204 | if( in.i+pRe->nInit>in.mx ) return 0; |
| 205 | } |
| 206 | if( pRe->nState<=(sizeof(aSpace)/(sizeof(aSpace[0])*2)) ){ |
| 207 | pToFree = 0; |
| 208 | aStateSet[0].aState = aSpace; |
| 209 | }else{ |
| @@ -275,11 +275,11 @@ | |
| 275 | re_add_state(pThis, x+pRe->aArg[x]); |
| 276 | break; |
| 277 | } |
| 278 | case RE_OP_ACCEPT: { |
| 279 | rc = 1; |
| 280 | goto re_match_end; |
| 281 | } |
| 282 | case RE_OP_CC_INC: |
| 283 | case RE_OP_CC_EXC: { |
| 284 | int j = 1; |
| 285 | int n = pRe->aArg[x]; |
| @@ -307,11 +307,11 @@ | |
| 307 | } |
| 308 | } |
| 309 | for(i=0; i<pNext->nState; i++){ |
| 310 | if( pRe->aOp[pNext->aState[i]]==RE_OP_ACCEPT ){ rc = 1; break; } |
| 311 | } |
| 312 | re_match_end: |
| 313 | fossil_free(pToFree); |
| 314 | return rc; |
| 315 | } |
| 316 | |
| 317 | /* Resize the opcode and argument arrays for an RE under construction. |
| @@ -386,11 +386,11 @@ | |
| 386 | static const char zEsc[] = "afnrtv\\()*.+?[$^{|}]"; |
| 387 | static const char zTrans[] = "\a\f\n\r\t\v"; |
| 388 | int i, v = 0; |
| 389 | char c; |
| 390 | if( p->sIn.i>=p->sIn.mx ) return 0; |
| 391 | c = p->sIn.z[p->sIn.i]; |
| 392 | if( c=='u' && p->sIn.i+5<p->sIn.mx ){ |
| 393 | v = 0; |
| 394 | const unsigned char *zIn = p->sIn.z + p->sIn.i; |
| 395 | if( re_hex(zIn[1],&v) |
| 396 | && re_hex(zIn[2],&v) |
| @@ -594,16 +594,17 @@ | |
| 594 | */ |
| 595 | void re_free(ReCompiled *pRe){ |
| 596 | if( pRe ){ |
| 597 | fossil_free(pRe->aOp); |
| 598 | fossil_free(pRe->aArg); |
| 599 | fossil_free(pRe); |
| 600 | } |
| 601 | } |
| 602 | |
| 603 | /* |
| 604 | ** Compile a textual regular expression in zIn[] into a compiled regular |
| 605 | ** expression suitable for us by re_match() and return a pointer to the |
| 606 | ** compiled regular expression in *ppRe. Return NULL on success or an |
| 607 | ** error message if something goes wrong. |
| 608 | */ |
| 609 | const char *re_compile(ReCompiled **ppRe, const char *zIn, int noCase){ |
| 610 | ReCompiled *pRe; |
| @@ -626,17 +627,17 @@ | |
| 627 | }else{ |
| 628 | re_append(pRe, RE_OP_ANYSTAR, 0); |
| 629 | } |
| 630 | pRe->sIn.z = (unsigned char*)zIn; |
| 631 | pRe->sIn.i = 0; |
| 632 | pRe->sIn.mx = strlen(pRe->sIn.z); |
| 633 | zErr = re_subcompile_re(pRe); |
| 634 | if( zErr ){ |
| 635 | re_free(pRe); |
| 636 | return zErr; |
| 637 | } |
| 638 | if( rePeek(pRe)=='$' && pRe->sIn.i+1>=pRe->sIn.mx ){ |
| 639 | re_append(pRe, RE_OP_MATCH, RE_EOF); |
| 640 | re_append(pRe, RE_OP_ACCEPT, 0); |
| 641 | *ppRe = pRe; |
| 642 | }else if( pRe->sIn.i>=pRe->sIn.mx ){ |
| 643 | re_append(pRe, RE_OP_ACCEPT, 0); |
| @@ -659,10 +660,11 @@ | |
| 660 | pRe->zInit[j++] = 0x80 | ((x>>6)&0x3f); |
| 661 | }else{ |
| 662 | break; |
| 663 | } |
| 664 | } |
| 665 | if( j>0 && pRe->zInit[j-1]==0 ) j--; |
| 666 | pRe->nInit = j; |
| 667 | } |
| 668 | return pRe->zErr; |
| 669 | } |
| 670 | |
| @@ -700,11 +702,11 @@ | |
| 702 | } |
| 703 | sqlite3_set_auxdata(context, 0, pRe, (void(*)(void*))re_free); |
| 704 | } |
| 705 | zStr = (const unsigned char*)sqlite3_value_text(argv[1]); |
| 706 | if( zStr!=0 ){ |
| 707 | sqlite3_result_int(context, re_match(pRe, zStr, -1)); |
| 708 | } |
| 709 | } |
| 710 | |
| 711 | /* |
| 712 | ** Invoke this routine in order to install the REGEXP function in an |
| @@ -731,11 +733,11 @@ | |
| 733 | char zLine[2000]; |
| 734 | while( fgets(zLine, sizeof(zLine), in) ){ |
| 735 | ln++; |
| 736 | n = (int)strlen(zLine); |
| 737 | while( n && (zLine[n-1]=='\n' || zLine[n-1]=='\r') ) n--; |
| 738 | if( re_match(pRe, (const unsigned char*)zLine, n) ){ |
| 739 | printf("%s:%d:%.*s\n", zFile, ln, n, zLine); |
| 740 | } |
| 741 | } |
| 742 | } |
| 743 | |
| 744 |