Fossil SCM

Rename re_execute() to re_match(). Other fixes to regex matcher bugs that were introduced when porting the code from SQLite.

drh 2013-01-03 20:02 trunk
Commit c84051f38db377f5bd7843b9f72fb0b5ca38f5c6
2 files changed +1 -1 +13 -11
+1 -1
--- src/diff.c
+++ src/diff.c
@@ -423,11 +423,11 @@
423423
ReCompiled *pRe, /* The regular expression to be matched */
424424
DLine *aDLine, /* First of N DLines to compare against */
425425
int N /* Number of DLines to check */
426426
){
427427
while( N-- ){
428
- if( re_execute(pRe, (const unsigned char *)aDLine->z, LENGTH(aDLine)) ){
428
+ if( re_match(pRe, (const unsigned char *)aDLine->z, LENGTH(aDLine)) ){
429429
return 1;
430430
}
431431
aDLine++;
432432
}
433433
return 0;
434434
--- src/diff.c
+++ src/diff.c
@@ -423,11 +423,11 @@
423 ReCompiled *pRe, /* The regular expression to be matched */
424 DLine *aDLine, /* First of N DLines to compare against */
425 int N /* Number of DLines to check */
426 ){
427 while( N-- ){
428 if( re_execute(pRe, (const unsigned char *)aDLine->z, LENGTH(aDLine)) ){
429 return 1;
430 }
431 aDLine++;
432 }
433 return 0;
434
--- src/diff.c
+++ src/diff.c
@@ -423,11 +423,11 @@
423 ReCompiled *pRe, /* The regular expression to be matched */
424 DLine *aDLine, /* First of N DLines to compare against */
425 int N /* Number of DLines to check */
426 ){
427 while( N-- ){
428 if( re_match(pRe, (const unsigned char *)aDLine->z, LENGTH(aDLine)) ){
429 return 1;
430 }
431 aDLine++;
432 }
433 return 0;
434
+13 -11
--- src/regexp.c
+++ src/regexp.c
@@ -178,11 +178,11 @@
178178
}
179179
180180
/* Run a compiled regular expression on the zero-terminated input
181181
** string zIn[]. Return true on a match and false if there is no match.
182182
*/
183
-int re_execute(ReCompiled *pRe, const unsigned char *zIn, int nIn){
183
+int re_match(ReCompiled *pRe, const unsigned char *zIn, int nIn){
184184
ReStateSet aStateSet[2], *pThis, *pNext;
185185
ReStateNumber aSpace[100];
186186
ReStateNumber *pToFree;
187187
unsigned int i = 0;
188188
unsigned int iSwap = 0;
@@ -194,16 +194,16 @@
194194
in.z = zIn;
195195
in.i = 0;
196196
in.mx = nIn>=0 ? nIn : strlen((char const*)zIn);
197197
if( pRe->nInit ){
198198
unsigned char x = pRe->zInit[0];
199
- while( in.i+pRe->nInit<in.mx
199
+ while( in.i+pRe->nInit<=in.mx
200200
&& (zIn[in.i]!=x || memcmp(zIn+in.i, pRe->zInit, pRe->nInit)!=0)
201201
){
202202
in.i++;
203203
}
204
- if( in.i+pRe->nInit>=in.mx ) return 0;
204
+ if( in.i+pRe->nInit>in.mx ) return 0;
205205
}
206206
if( pRe->nState<=(sizeof(aSpace)/(sizeof(aSpace[0])*2)) ){
207207
pToFree = 0;
208208
aStateSet[0].aState = aSpace;
209209
}else{
@@ -275,11 +275,11 @@
275275
re_add_state(pThis, x+pRe->aArg[x]);
276276
break;
277277
}
278278
case RE_OP_ACCEPT: {
279279
rc = 1;
280
- goto re_execute_end;
280
+ goto re_match_end;
281281
}
282282
case RE_OP_CC_INC:
283283
case RE_OP_CC_EXC: {
284284
int j = 1;
285285
int n = pRe->aArg[x];
@@ -307,11 +307,11 @@
307307
}
308308
}
309309
for(i=0; i<pNext->nState; i++){
310310
if( pRe->aOp[pNext->aState[i]]==RE_OP_ACCEPT ){ rc = 1; break; }
311311
}
312
-re_execute_end:
312
+re_match_end:
313313
fossil_free(pToFree);
314314
return rc;
315315
}
316316
317317
/* Resize the opcode and argument arrays for an RE under construction.
@@ -386,11 +386,11 @@
386386
static const char zEsc[] = "afnrtv\\()*.+?[$^{|}]";
387387
static const char zTrans[] = "\a\f\n\r\t\v";
388388
int i, v = 0;
389389
char c;
390390
if( p->sIn.i>=p->sIn.mx ) return 0;
391
- c = p->sIn.z[0];
391
+ c = p->sIn.z[p->sIn.i];
392392
if( c=='u' && p->sIn.i+5<p->sIn.mx ){
393393
v = 0;
394394
const unsigned char *zIn = p->sIn.z + p->sIn.i;
395395
if( re_hex(zIn[1],&v)
396396
&& re_hex(zIn[2],&v)
@@ -594,16 +594,17 @@
594594
*/
595595
void re_free(ReCompiled *pRe){
596596
if( pRe ){
597597
fossil_free(pRe->aOp);
598598
fossil_free(pRe->aArg);
599
+ fossil_free(pRe);
599600
}
600601
}
601602
602603
/*
603604
** Compile a textual regular expression in zIn[] into a compiled regular
604
-** expression suitable for us by re_execute() and return a pointer to the
605
+** expression suitable for us by re_match() and return a pointer to the
605606
** compiled regular expression in *ppRe. Return NULL on success or an
606607
** error message if something goes wrong.
607608
*/
608609
const char *re_compile(ReCompiled **ppRe, const char *zIn, int noCase){
609610
ReCompiled *pRe;
@@ -626,17 +627,17 @@
626627
}else{
627628
re_append(pRe, RE_OP_ANYSTAR, 0);
628629
}
629630
pRe->sIn.z = (unsigned char*)zIn;
630631
pRe->sIn.i = 0;
631
- pRe->sIn.mx = strlen(zIn);
632
+ pRe->sIn.mx = strlen(pRe->sIn.z);
632633
zErr = re_subcompile_re(pRe);
633634
if( zErr ){
634635
re_free(pRe);
635636
return zErr;
636637
}
637
- if( rePeek(pRe)=='$' && pRe->sIn.i+1==pRe->sIn.mx ){
638
+ if( rePeek(pRe)=='$' && pRe->sIn.i+1>=pRe->sIn.mx ){
638639
re_append(pRe, RE_OP_MATCH, RE_EOF);
639640
re_append(pRe, RE_OP_ACCEPT, 0);
640641
*ppRe = pRe;
641642
}else if( pRe->sIn.i>=pRe->sIn.mx ){
642643
re_append(pRe, RE_OP_ACCEPT, 0);
@@ -659,10 +660,11 @@
659660
pRe->zInit[j++] = 0x80 | ((x>>6)&0x3f);
660661
}else{
661662
break;
662663
}
663664
}
665
+ if( j>0 && pRe->zInit[j-1]==0 ) j--;
664666
pRe->nInit = j;
665667
}
666668
return pRe->zErr;
667669
}
668670
@@ -700,11 +702,11 @@
700702
}
701703
sqlite3_set_auxdata(context, 0, pRe, (void(*)(void*))re_free);
702704
}
703705
zStr = (const unsigned char*)sqlite3_value_text(argv[1]);
704706
if( zStr!=0 ){
705
- sqlite3_result_int(context, re_execute(pRe, zStr, -1));
707
+ sqlite3_result_int(context, re_match(pRe, zStr, -1));
706708
}
707709
}
708710
709711
/*
710712
** Invoke this routine in order to install the REGEXP function in an
@@ -731,11 +733,11 @@
731733
char zLine[2000];
732734
while( fgets(zLine, sizeof(zLine), in) ){
733735
ln++;
734736
n = (int)strlen(zLine);
735737
while( n && (zLine[n-1]=='\n' || zLine[n-1]=='\r') ) n--;
736
- if( re_execute(pRe, (const unsigned char*)zLine, n) ){
738
+ if( re_match(pRe, (const unsigned char*)zLine, n) ){
737739
printf("%s:%d:%.*s\n", zFile, ln, n, zLine);
738740
}
739741
}
740742
}
741743
742744
--- src/regexp.c
+++ src/regexp.c
@@ -178,11 +178,11 @@
178 }
179
180 /* Run a compiled regular expression on the zero-terminated input
181 ** string zIn[]. Return true on a match and false if there is no match.
182 */
183 int re_execute(ReCompiled *pRe, const unsigned char *zIn, int nIn){
184 ReStateSet aStateSet[2], *pThis, *pNext;
185 ReStateNumber aSpace[100];
186 ReStateNumber *pToFree;
187 unsigned int i = 0;
188 unsigned int iSwap = 0;
@@ -194,16 +194,16 @@
194 in.z = zIn;
195 in.i = 0;
196 in.mx = nIn>=0 ? nIn : strlen((char const*)zIn);
197 if( pRe->nInit ){
198 unsigned char x = pRe->zInit[0];
199 while( in.i+pRe->nInit<in.mx
200 && (zIn[in.i]!=x || memcmp(zIn+in.i, pRe->zInit, pRe->nInit)!=0)
201 ){
202 in.i++;
203 }
204 if( in.i+pRe->nInit>=in.mx ) return 0;
205 }
206 if( pRe->nState<=(sizeof(aSpace)/(sizeof(aSpace[0])*2)) ){
207 pToFree = 0;
208 aStateSet[0].aState = aSpace;
209 }else{
@@ -275,11 +275,11 @@
275 re_add_state(pThis, x+pRe->aArg[x]);
276 break;
277 }
278 case RE_OP_ACCEPT: {
279 rc = 1;
280 goto re_execute_end;
281 }
282 case RE_OP_CC_INC:
283 case RE_OP_CC_EXC: {
284 int j = 1;
285 int n = pRe->aArg[x];
@@ -307,11 +307,11 @@
307 }
308 }
309 for(i=0; i<pNext->nState; i++){
310 if( pRe->aOp[pNext->aState[i]]==RE_OP_ACCEPT ){ rc = 1; break; }
311 }
312 re_execute_end:
313 fossil_free(pToFree);
314 return rc;
315 }
316
317 /* Resize the opcode and argument arrays for an RE under construction.
@@ -386,11 +386,11 @@
386 static const char zEsc[] = "afnrtv\\()*.+?[$^{|}]";
387 static const char zTrans[] = "\a\f\n\r\t\v";
388 int i, v = 0;
389 char c;
390 if( p->sIn.i>=p->sIn.mx ) return 0;
391 c = p->sIn.z[0];
392 if( c=='u' && p->sIn.i+5<p->sIn.mx ){
393 v = 0;
394 const unsigned char *zIn = p->sIn.z + p->sIn.i;
395 if( re_hex(zIn[1],&v)
396 && re_hex(zIn[2],&v)
@@ -594,16 +594,17 @@
594 */
595 void re_free(ReCompiled *pRe){
596 if( pRe ){
597 fossil_free(pRe->aOp);
598 fossil_free(pRe->aArg);
 
599 }
600 }
601
602 /*
603 ** Compile a textual regular expression in zIn[] into a compiled regular
604 ** expression suitable for us by re_execute() and return a pointer to the
605 ** compiled regular expression in *ppRe. Return NULL on success or an
606 ** error message if something goes wrong.
607 */
608 const char *re_compile(ReCompiled **ppRe, const char *zIn, int noCase){
609 ReCompiled *pRe;
@@ -626,17 +627,17 @@
626 }else{
627 re_append(pRe, RE_OP_ANYSTAR, 0);
628 }
629 pRe->sIn.z = (unsigned char*)zIn;
630 pRe->sIn.i = 0;
631 pRe->sIn.mx = strlen(zIn);
632 zErr = re_subcompile_re(pRe);
633 if( zErr ){
634 re_free(pRe);
635 return zErr;
636 }
637 if( rePeek(pRe)=='$' && pRe->sIn.i+1==pRe->sIn.mx ){
638 re_append(pRe, RE_OP_MATCH, RE_EOF);
639 re_append(pRe, RE_OP_ACCEPT, 0);
640 *ppRe = pRe;
641 }else if( pRe->sIn.i>=pRe->sIn.mx ){
642 re_append(pRe, RE_OP_ACCEPT, 0);
@@ -659,10 +660,11 @@
659 pRe->zInit[j++] = 0x80 | ((x>>6)&0x3f);
660 }else{
661 break;
662 }
663 }
 
664 pRe->nInit = j;
665 }
666 return pRe->zErr;
667 }
668
@@ -700,11 +702,11 @@
700 }
701 sqlite3_set_auxdata(context, 0, pRe, (void(*)(void*))re_free);
702 }
703 zStr = (const unsigned char*)sqlite3_value_text(argv[1]);
704 if( zStr!=0 ){
705 sqlite3_result_int(context, re_execute(pRe, zStr, -1));
706 }
707 }
708
709 /*
710 ** Invoke this routine in order to install the REGEXP function in an
@@ -731,11 +733,11 @@
731 char zLine[2000];
732 while( fgets(zLine, sizeof(zLine), in) ){
733 ln++;
734 n = (int)strlen(zLine);
735 while( n && (zLine[n-1]=='\n' || zLine[n-1]=='\r') ) n--;
736 if( re_execute(pRe, (const unsigned char*)zLine, n) ){
737 printf("%s:%d:%.*s\n", zFile, ln, n, zLine);
738 }
739 }
740 }
741
742
--- src/regexp.c
+++ src/regexp.c
@@ -178,11 +178,11 @@
178 }
179
180 /* Run a compiled regular expression on the zero-terminated input
181 ** string zIn[]. Return true on a match and false if there is no match.
182 */
183 int re_match(ReCompiled *pRe, const unsigned char *zIn, int nIn){
184 ReStateSet aStateSet[2], *pThis, *pNext;
185 ReStateNumber aSpace[100];
186 ReStateNumber *pToFree;
187 unsigned int i = 0;
188 unsigned int iSwap = 0;
@@ -194,16 +194,16 @@
194 in.z = zIn;
195 in.i = 0;
196 in.mx = nIn>=0 ? nIn : strlen((char const*)zIn);
197 if( pRe->nInit ){
198 unsigned char x = pRe->zInit[0];
199 while( in.i+pRe->nInit<=in.mx
200 && (zIn[in.i]!=x || memcmp(zIn+in.i, pRe->zInit, pRe->nInit)!=0)
201 ){
202 in.i++;
203 }
204 if( in.i+pRe->nInit>in.mx ) return 0;
205 }
206 if( pRe->nState<=(sizeof(aSpace)/(sizeof(aSpace[0])*2)) ){
207 pToFree = 0;
208 aStateSet[0].aState = aSpace;
209 }else{
@@ -275,11 +275,11 @@
275 re_add_state(pThis, x+pRe->aArg[x]);
276 break;
277 }
278 case RE_OP_ACCEPT: {
279 rc = 1;
280 goto re_match_end;
281 }
282 case RE_OP_CC_INC:
283 case RE_OP_CC_EXC: {
284 int j = 1;
285 int n = pRe->aArg[x];
@@ -307,11 +307,11 @@
307 }
308 }
309 for(i=0; i<pNext->nState; i++){
310 if( pRe->aOp[pNext->aState[i]]==RE_OP_ACCEPT ){ rc = 1; break; }
311 }
312 re_match_end:
313 fossil_free(pToFree);
314 return rc;
315 }
316
317 /* Resize the opcode and argument arrays for an RE under construction.
@@ -386,11 +386,11 @@
386 static const char zEsc[] = "afnrtv\\()*.+?[$^{|}]";
387 static const char zTrans[] = "\a\f\n\r\t\v";
388 int i, v = 0;
389 char c;
390 if( p->sIn.i>=p->sIn.mx ) return 0;
391 c = p->sIn.z[p->sIn.i];
392 if( c=='u' && p->sIn.i+5<p->sIn.mx ){
393 v = 0;
394 const unsigned char *zIn = p->sIn.z + p->sIn.i;
395 if( re_hex(zIn[1],&v)
396 && re_hex(zIn[2],&v)
@@ -594,16 +594,17 @@
594 */
595 void re_free(ReCompiled *pRe){
596 if( pRe ){
597 fossil_free(pRe->aOp);
598 fossil_free(pRe->aArg);
599 fossil_free(pRe);
600 }
601 }
602
603 /*
604 ** Compile a textual regular expression in zIn[] into a compiled regular
605 ** expression suitable for us by re_match() and return a pointer to the
606 ** compiled regular expression in *ppRe. Return NULL on success or an
607 ** error message if something goes wrong.
608 */
609 const char *re_compile(ReCompiled **ppRe, const char *zIn, int noCase){
610 ReCompiled *pRe;
@@ -626,17 +627,17 @@
627 }else{
628 re_append(pRe, RE_OP_ANYSTAR, 0);
629 }
630 pRe->sIn.z = (unsigned char*)zIn;
631 pRe->sIn.i = 0;
632 pRe->sIn.mx = strlen(pRe->sIn.z);
633 zErr = re_subcompile_re(pRe);
634 if( zErr ){
635 re_free(pRe);
636 return zErr;
637 }
638 if( rePeek(pRe)=='$' && pRe->sIn.i+1>=pRe->sIn.mx ){
639 re_append(pRe, RE_OP_MATCH, RE_EOF);
640 re_append(pRe, RE_OP_ACCEPT, 0);
641 *ppRe = pRe;
642 }else if( pRe->sIn.i>=pRe->sIn.mx ){
643 re_append(pRe, RE_OP_ACCEPT, 0);
@@ -659,10 +660,11 @@
660 pRe->zInit[j++] = 0x80 | ((x>>6)&0x3f);
661 }else{
662 break;
663 }
664 }
665 if( j>0 && pRe->zInit[j-1]==0 ) j--;
666 pRe->nInit = j;
667 }
668 return pRe->zErr;
669 }
670
@@ -700,11 +702,11 @@
702 }
703 sqlite3_set_auxdata(context, 0, pRe, (void(*)(void*))re_free);
704 }
705 zStr = (const unsigned char*)sqlite3_value_text(argv[1]);
706 if( zStr!=0 ){
707 sqlite3_result_int(context, re_match(pRe, zStr, -1));
708 }
709 }
710
711 /*
712 ** Invoke this routine in order to install the REGEXP function in an
@@ -731,11 +733,11 @@
733 char zLine[2000];
734 while( fgets(zLine, sizeof(zLine), in) ){
735 ln++;
736 n = (int)strlen(zLine);
737 while( n && (zLine[n-1]=='\n' || zLine[n-1]=='\r') ) n--;
738 if( re_match(pRe, (const unsigned char*)zLine, n) ){
739 printf("%s:%d:%.*s\n", zFile, ln, n, zLine);
740 }
741 }
742 }
743
744

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button