Fossil SCM

Update the regexp implementation with the latest fixes from the SQLite trunk. Add the regexpi() SQL function.

drh 2021-06-03 13:55 trunk
Commit 1f3a053b1613f2c69b158928536851b0dea9942f8155bc63e837d86a5015078b
1 file changed +13 -5
+13 -5
--- src/regexp.c
+++ src/regexp.c
@@ -197,11 +197,11 @@
197197
case RE_OP_MATCH: {
198198
if( pRe->aArg[x]==c ) re_add_state(pNext, x+1);
199199
break;
200200
}
201201
case RE_OP_ANY: {
202
- re_add_state(pNext, x+1);
202
+ if( c!=0 ) re_add_state(pNext, x+1);
203203
break;
204204
}
205205
case RE_OP_WORD: {
206206
if( re_word_char(c) ) re_add_state(pNext, x+1);
207207
break;
@@ -622,11 +622,11 @@
622622
** zInit[]. The re_match() routine can then search ahead in the input
623623
** string looking for the initial match without having to run the whole
624624
** regex engine over the string. Do not worry able trying to match
625625
** unicode characters beyond plane 0 - those are very rare and this is
626626
** just an optimization. */
627
- if( pRe->aOp[0]==RE_OP_ANYSTAR ){
627
+ if( pRe->aOp[0]==RE_OP_ANYSTAR && !noCase ){
628628
for(j=0, i=1; j<sizeof(pRe->zInit)-2 && pRe->aOp[i]==RE_OP_MATCH; i++){
629629
unsigned x = pRe->aArg[i];
630630
if( x<=127 ){
631631
pRe->zInit[j++] = (unsigned char)x;
632632
}else if( x<=0xfff ){
@@ -668,11 +668,11 @@
668668
669669
pRe = sqlite3_get_auxdata(context, 0);
670670
if( pRe==0 ){
671671
zPattern = (const char*)sqlite3_value_text(argv[0]);
672672
if( zPattern==0 ) return;
673
- zErr = re_compile(&pRe, zPattern, 0);
673
+ zErr = re_compile(&pRe, zPattern, sqlite3_user_data(context)!=0);
674674
if( zErr ){
675675
re_free(pRe);
676676
sqlite3_result_error(context, zErr, -1);
677677
return;
678678
}
@@ -694,12 +694,20 @@
694694
/*
695695
** Invoke this routine to register the regexp() function with the
696696
** SQLite database connection.
697697
*/
698698
int re_add_sql_func(sqlite3 *db){
699
- return sqlite3_create_function(db, "regexp", 2, SQLITE_UTF8, 0,
700
- re_sql_func, 0, 0);
699
+ int rc;
700
+ rc = sqlite3_create_function(db, "regexp", 2, SQLITE_UTF8|SQLITE_INNOCUOUS,
701
+ 0, re_sql_func, 0, 0);
702
+ if( rc==SQLITE_OK ){
703
+ /* The regexpi(PATTERN,STRING) function is a case-insensitive version
704
+ ** of regexp(PATTERN,STRING). */
705
+ rc = sqlite3_create_function(db, "regexpi", 2, SQLITE_UTF8|SQLITE_INNOCUOUS,
706
+ (void*)db, re_sql_func, 0, 0);
707
+ }
708
+ return rc;
701709
}
702710
703711
/*
704712
** Run a "grep" over a single file read from disk.
705713
*/
706714
--- src/regexp.c
+++ src/regexp.c
@@ -197,11 +197,11 @@
197 case RE_OP_MATCH: {
198 if( pRe->aArg[x]==c ) re_add_state(pNext, x+1);
199 break;
200 }
201 case RE_OP_ANY: {
202 re_add_state(pNext, x+1);
203 break;
204 }
205 case RE_OP_WORD: {
206 if( re_word_char(c) ) re_add_state(pNext, x+1);
207 break;
@@ -622,11 +622,11 @@
622 ** zInit[]. The re_match() routine can then search ahead in the input
623 ** string looking for the initial match without having to run the whole
624 ** regex engine over the string. Do not worry able trying to match
625 ** unicode characters beyond plane 0 - those are very rare and this is
626 ** just an optimization. */
627 if( pRe->aOp[0]==RE_OP_ANYSTAR ){
628 for(j=0, i=1; j<sizeof(pRe->zInit)-2 && pRe->aOp[i]==RE_OP_MATCH; i++){
629 unsigned x = pRe->aArg[i];
630 if( x<=127 ){
631 pRe->zInit[j++] = (unsigned char)x;
632 }else if( x<=0xfff ){
@@ -668,11 +668,11 @@
668
669 pRe = sqlite3_get_auxdata(context, 0);
670 if( pRe==0 ){
671 zPattern = (const char*)sqlite3_value_text(argv[0]);
672 if( zPattern==0 ) return;
673 zErr = re_compile(&pRe, zPattern, 0);
674 if( zErr ){
675 re_free(pRe);
676 sqlite3_result_error(context, zErr, -1);
677 return;
678 }
@@ -694,12 +694,20 @@
694 /*
695 ** Invoke this routine to register the regexp() function with the
696 ** SQLite database connection.
697 */
698 int re_add_sql_func(sqlite3 *db){
699 return sqlite3_create_function(db, "regexp", 2, SQLITE_UTF8, 0,
700 re_sql_func, 0, 0);
 
 
 
 
 
 
 
 
701 }
702
703 /*
704 ** Run a "grep" over a single file read from disk.
705 */
706
--- src/regexp.c
+++ src/regexp.c
@@ -197,11 +197,11 @@
197 case RE_OP_MATCH: {
198 if( pRe->aArg[x]==c ) re_add_state(pNext, x+1);
199 break;
200 }
201 case RE_OP_ANY: {
202 if( c!=0 ) re_add_state(pNext, x+1);
203 break;
204 }
205 case RE_OP_WORD: {
206 if( re_word_char(c) ) re_add_state(pNext, x+1);
207 break;
@@ -622,11 +622,11 @@
622 ** zInit[]. The re_match() routine can then search ahead in the input
623 ** string looking for the initial match without having to run the whole
624 ** regex engine over the string. Do not worry able trying to match
625 ** unicode characters beyond plane 0 - those are very rare and this is
626 ** just an optimization. */
627 if( pRe->aOp[0]==RE_OP_ANYSTAR && !noCase ){
628 for(j=0, i=1; j<sizeof(pRe->zInit)-2 && pRe->aOp[i]==RE_OP_MATCH; i++){
629 unsigned x = pRe->aArg[i];
630 if( x<=127 ){
631 pRe->zInit[j++] = (unsigned char)x;
632 }else if( x<=0xfff ){
@@ -668,11 +668,11 @@
668
669 pRe = sqlite3_get_auxdata(context, 0);
670 if( pRe==0 ){
671 zPattern = (const char*)sqlite3_value_text(argv[0]);
672 if( zPattern==0 ) return;
673 zErr = re_compile(&pRe, zPattern, sqlite3_user_data(context)!=0);
674 if( zErr ){
675 re_free(pRe);
676 sqlite3_result_error(context, zErr, -1);
677 return;
678 }
@@ -694,12 +694,20 @@
694 /*
695 ** Invoke this routine to register the regexp() function with the
696 ** SQLite database connection.
697 */
698 int re_add_sql_func(sqlite3 *db){
699 int rc;
700 rc = sqlite3_create_function(db, "regexp", 2, SQLITE_UTF8|SQLITE_INNOCUOUS,
701 0, re_sql_func, 0, 0);
702 if( rc==SQLITE_OK ){
703 /* The regexpi(PATTERN,STRING) function is a case-insensitive version
704 ** of regexp(PATTERN,STRING). */
705 rc = sqlite3_create_function(db, "regexpi", 2, SQLITE_UTF8|SQLITE_INNOCUOUS,
706 (void*)db, re_sql_func, 0, 0);
707 }
708 return rc;
709 }
710
711 /*
712 ** Run a "grep" over a single file read from disk.
713 */
714

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button