| | @@ -53,18 +53,16 @@ |
| 53 | 53 | ** expression and M is the size of the input string. The matcher never |
| 54 | 54 | ** exhibits exponential behavior. Note that the X{p,q} operator expands |
| 55 | 55 | ** to p copies of X following by q-p copies of X? and that the size of the |
| 56 | 56 | ** regular expression in the O(N*M) performance bound is computed after |
| 57 | 57 | ** this expansion. |
| 58 | +** |
| 59 | +** To help prevent DoS attacks, the maximum size of the NFA is restricted. |
| 58 | 60 | */ |
| 59 | 61 | #include "config.h" |
| 60 | 62 | #include "regexp.h" |
| 61 | 63 | |
| 62 | | -#ifndef SQLITE_MAX_REGEXP_REPEAT |
| 63 | | -# define SQLITE_MAX_REGEXP_REPEAT 999 |
| 64 | | -#endif |
| 65 | | - |
| 66 | 64 | /* The end-of-input character */ |
| 67 | 65 | #define RE_EOF 0 /* End of input */ |
| 68 | 66 | #define RE_START 0xfffffff /* Start of input - larger than an UTF-8 */ |
| 69 | 67 | |
| 70 | 68 | /* The NFA is implemented as sequence of opcodes taken from the following |
| | @@ -119,11 +117,11 @@ |
| 119 | 117 | const char *zErr; /* Error message to return */ |
| 120 | 118 | char *aOp; /* Operators for the virtual machine */ |
| 121 | 119 | int *aArg; /* Arguments to each operator */ |
| 122 | 120 | unsigned (*xNextChar)(ReInput*); /* Next character function */ |
| 123 | 121 | unsigned char zInit[12]; /* Initial text to match */ |
| 124 | | - int nInit; /* Number of characters in zInit */ |
| 122 | + int nInit; /* Number of bytes in zInit */ |
| 125 | 123 | unsigned nState; /* Number of entries in aOp[] and aArg[] */ |
| 126 | 124 | unsigned nAlloc; /* Slots allocated for aOp[] and aArg[] */ |
| 127 | 125 | unsigned mxAlloc; /* Complexity limit */ |
| 128 | 126 | }; |
| 129 | 127 | #endif |
| | @@ -151,11 +149,11 @@ |
| 151 | 149 | }else if( (c&0xf0)==0xe0 && p->i+1<p->mx && (p->z[p->i]&0xc0)==0x80 |
| 152 | 150 | && (p->z[p->i+1]&0xc0)==0x80 ){ |
| 153 | 151 | c = (c&0x0f)<<12 | ((p->z[p->i]&0x3f)<<6) | (p->z[p->i+1]&0x3f); |
| 154 | 152 | p->i += 2; |
| 155 | 153 | if( c<=0x7ff || (c>=0xd800 && c<=0xdfff) ) c = 0xfffd; |
| 156 | | - }else if( (c&0xf8)==0xf0 && p->i+3<p->mx && (p->z[p->i]&0xc0)==0x80 |
| 154 | + }else if( (c&0xf8)==0xf0 && p->i+2<p->mx && (p->z[p->i]&0xc0)==0x80 |
| 157 | 155 | && (p->z[p->i+1]&0xc0)==0x80 && (p->z[p->i+2]&0xc0)==0x80 ){ |
| 158 | 156 | c = (c&0x07)<<18 | ((p->z[p->i]&0x3f)<<12) | ((p->z[p->i+1]&0x3f)<<6) |
| 159 | 157 | | (p->z[p->i+2]&0x3f); |
| 160 | 158 | p->i += 3; |
| 161 | 159 | if( c<=0xffff || c>0x10ffff ) c = 0xfffd; |
| | @@ -296,13 +294,13 @@ |
| 296 | 294 | rc = 1; |
| 297 | 295 | goto re_match_end; |
| 298 | 296 | } |
| 299 | 297 | case RE_OP_CC_EXC: { |
| 300 | 298 | if( c==0 ) break; |
| 301 | | - /* fall-through */ |
| 299 | + /* fall-through */ goto re_op_cc_inc; |
| 302 | 300 | } |
| 303 | | - case RE_OP_CC_INC: { |
| 301 | + case RE_OP_CC_INC: re_op_cc_inc: { |
| 304 | 302 | int j = 1; |
| 305 | 303 | int n = pRe->aArg[x]; |
| 306 | 304 | int hit = 0; |
| 307 | 305 | for(j=1; j>0 && j<n; j++){ |
| 308 | 306 | if( pRe->aOp[x+j]==RE_OP_CC_VALUE ){ |
| | @@ -547,11 +545,11 @@ |
| 547 | 545 | if( n*2>p->mxAlloc ) return "REGEXP pattern too big"; |
| 548 | 546 | p->sIn.i++; |
| 549 | 547 | } |
| 550 | 548 | } |
| 551 | 549 | if( c!='}' ) return "unmatched '{'"; |
| 552 | | - if( n>0 && n<m ) return "n less than m in '{m,n}'"; |
| 550 | + if( n<m ) return "n less than m in '{m,n}'"; |
| 553 | 551 | p->sIn.i++; |
| 554 | 552 | sz = p->nState - iPrev; |
| 555 | 553 | if( m==0 ){ |
| 556 | 554 | if( n==0 ) return "both m and n are zero in '{m,n}'"; |
| 557 | 555 | re_insert(p, iPrev, RE_OP_FORK, sz+1); |
| | @@ -643,11 +641,11 @@ |
| 643 | 641 | ** Compile a textual regular expression in zIn[] into a compiled regular |
| 644 | 642 | ** expression suitable for us by re_match() and return a pointer to the |
| 645 | 643 | ** compiled regular expression in *ppRe. Return NULL on success or an |
| 646 | 644 | ** error message if something goes wrong. |
| 647 | 645 | */ |
| 648 | | -const char *re_compile( |
| 646 | +static const char *re_compile( |
| 649 | 647 | ReCompiled **ppRe, /* OUT: write compiled NFA here */ |
| 650 | 648 | const char *zIn, /* Input regular expression */ |
| 651 | 649 | int mxRe, /* Complexity limit */ |
| 652 | 650 | int noCase /* True for caseless comparisons */ |
| 653 | 651 | ){ |
| | @@ -716,10 +714,79 @@ |
| 716 | 714 | if( j>0 && pRe->zInit[j-1]==0 ) j--; |
| 717 | 715 | pRe->nInit = j; |
| 718 | 716 | } |
| 719 | 717 | return pRe->zErr; |
| 720 | 718 | } |
| 719 | + |
| 720 | +/* |
| 721 | +** Implementation of the regexp() SQL function. This function implements |
| 722 | +** the build-in REGEXP operator. The first argument to the function is the |
| 723 | +** pattern and the second argument is the string. So, the SQL statements: |
| 724 | +** |
| 725 | +** A REGEXP B |
| 726 | +** |
| 727 | +** is implemented as regexp(B,A). |
| 728 | +*/ |
| 729 | +static void re_sql_func( |
| 730 | + sqlite3_context *context, |
| 731 | + int argc, |
| 732 | + sqlite3_value **argv |
| 733 | +){ |
| 734 | + ReCompiled *pRe; /* Compiled regular expression */ |
| 735 | + const char *zPattern; /* The regular expression */ |
| 736 | + const unsigned char *zStr;/* String being searched */ |
| 737 | + const char *zErr; /* Compile error message */ |
| 738 | + int setAux = 0; /* True to invoke sqlite3_set_auxdata() */ |
| 739 | + |
| 740 | + (void)argc; /* Unused */ |
| 741 | + pRe = sqlite3_get_auxdata(context, 0); |
| 742 | + if( pRe==0 ){ |
| 743 | + zPattern = (const char*)sqlite3_value_text(argv[0]); |
| 744 | + if( zPattern==0 ) return; |
| 745 | + zErr = fossil_re_compile(&pRe, zPattern, sqlite3_user_data(context)!=0); |
| 746 | + if( zErr ){ |
| 747 | + re_free(pRe); |
| 748 | + /* The original SQLite function from which this code was copied raises |
| 749 | + ** an error if the REGEXP contained a syntax error. This variant |
| 750 | + ** silently fails to match, as that works better for Fossil. |
| 751 | + ** sqlite3_result_error(context, zErr, -1); */ |
| 752 | + sqlite3_result_int(context, 0); |
| 753 | + return; |
| 754 | + } |
| 755 | + if( pRe==0 ){ |
| 756 | + sqlite3_result_error_nomem(context); |
| 757 | + return; |
| 758 | + } |
| 759 | + setAux = 1; |
| 760 | + } |
| 761 | + zStr = (const unsigned char*)sqlite3_value_text(argv[1]); |
| 762 | + if( zStr!=0 ){ |
| 763 | + sqlite3_result_int(context, re_match(pRe, zStr, -1)); |
| 764 | + } |
| 765 | + if( setAux ){ |
| 766 | + sqlite3_set_auxdata(context, 0, pRe, (void(*)(void*))re_free); |
| 767 | + } |
| 768 | +} |
| 769 | + |
| 770 | +/* |
| 771 | +** Invoke this routine to register the regexp() function with the |
| 772 | +** SQLite database connection. |
| 773 | +*/ |
| 774 | +int re_add_sql_func(sqlite3 *db){ |
| 775 | + int rc; |
| 776 | + rc = sqlite3_create_function(db, "regexp", 2, |
| 777 | + SQLITE_UTF8|SQLITE_INNOCUOUS|SQLITE_DETERMINISTIC, |
| 778 | + 0, re_sql_func, 0, 0); |
| 779 | + if( rc==SQLITE_OK ){ |
| 780 | + /* The regexpi(PATTERN,STRING) function is a case-insensitive version |
| 781 | + ** of regexp(PATTERN,STRING). */ |
| 782 | + rc = sqlite3_create_function(db, "regexpi", 2, |
| 783 | + SQLITE_UTF8|SQLITE_INNOCUOUS|SQLITE_DETERMINISTIC, |
| 784 | + (void*)db, re_sql_func, 0, 0); |
| 785 | + } |
| 786 | + return rc; |
| 787 | +} |
| 721 | 788 | |
| 722 | 789 | /* |
| 723 | 790 | ** The input zIn is a string that we want to match exactly as part of |
| 724 | 791 | ** a regular expression. Return a new string (in space obtained from |
| 725 | 792 | ** fossil_malloc() or the equivalent) that escapes all regexp syntax |
| | @@ -764,92 +831,20 @@ |
| 764 | 831 | ** Limit the size of the bytecode used to implement a regular expression |
| 765 | 832 | ** to this many steps. It is important to limit this to avoid possible |
| 766 | 833 | ** DoS attacks. |
| 767 | 834 | */ |
| 768 | 835 | |
| 769 | | -/* |
| 770 | | -** Compute a reasonable limit on the length of the REGEXP NFA. |
| 771 | | -*/ |
| 772 | | -int re_maxlen(void){ |
| 773 | | - return g.db ? db_get_int("regexp-limit", 1000) : 1000; |
| 774 | | -} |
| 775 | | - |
| 776 | 836 | /* |
| 777 | 837 | ** Compile an RE using re_maxlen(). |
| 778 | 838 | */ |
| 779 | 839 | const char *fossil_re_compile( |
| 780 | 840 | ReCompiled **ppRe, /* OUT: write compiled NFA here */ |
| 781 | 841 | const char *zIn, /* Input regular expression */ |
| 782 | 842 | int noCase /* True for caseless comparisons */ |
| 783 | 843 | ){ |
| 784 | | - return re_compile(ppRe, zIn, re_maxlen(), noCase); |
| 785 | | -} |
| 786 | | - |
| 787 | | -/* |
| 788 | | -** Implementation of the regexp() SQL function. This function implements |
| 789 | | -** the build-in REGEXP operator. The first argument to the function is the |
| 790 | | -** pattern and the second argument is the string. So, the SQL statements: |
| 791 | | -** |
| 792 | | -** A REGEXP B |
| 793 | | -** |
| 794 | | -** is implemented as regexp(B,A). |
| 795 | | -*/ |
| 796 | | -static void re_sql_func( |
| 797 | | - sqlite3_context *context, |
| 798 | | - int argc, |
| 799 | | - sqlite3_value **argv |
| 800 | | -){ |
| 801 | | - ReCompiled *pRe; /* Compiled regular expression */ |
| 802 | | - const char *zPattern; /* The regular expression */ |
| 803 | | - const unsigned char *zStr;/* String being searched */ |
| 804 | | - const char *zErr; /* Compile error message */ |
| 805 | | - int setAux = 0; /* True to invoke sqlite3_set_auxdata() */ |
| 806 | | - |
| 807 | | - (void)argc; /* Unused */ |
| 808 | | - pRe = sqlite3_get_auxdata(context, 0); |
| 809 | | - if( pRe==0 ){ |
| 810 | | - zPattern = (const char*)sqlite3_value_text(argv[0]); |
| 811 | | - if( zPattern==0 ) return; |
| 812 | | - zErr = fossil_re_compile(&pRe, zPattern, sqlite3_user_data(context)!=0); |
| 813 | | - if( zErr ){ |
| 814 | | - re_free(pRe); |
| 815 | | - sqlite3_result_int(context, 0); |
| 816 | | - /* sqlite3_result_error(context, zErr, -1); */ |
| 817 | | - return; |
| 818 | | - } |
| 819 | | - if( pRe==0 ){ |
| 820 | | - sqlite3_result_error_nomem(context); |
| 821 | | - return; |
| 822 | | - } |
| 823 | | - setAux = 1; |
| 824 | | - } |
| 825 | | - zStr = (const unsigned char*)sqlite3_value_text(argv[1]); |
| 826 | | - if( zStr!=0 ){ |
| 827 | | - sqlite3_result_int(context, re_match(pRe, zStr, -1)); |
| 828 | | - } |
| 829 | | - if( setAux ){ |
| 830 | | - sqlite3_set_auxdata(context, 0, pRe, (void(*)(void*))re_free); |
| 831 | | - } |
| 832 | | -} |
| 833 | | - |
| 834 | | -/* |
| 835 | | -** Invoke this routine to register the regexp() function with the |
| 836 | | -** SQLite database connection. |
| 837 | | -*/ |
| 838 | | -int re_add_sql_func(sqlite3 *db){ |
| 839 | | - int rc; |
| 840 | | - rc = sqlite3_create_function(db, "regexp", 2, |
| 841 | | - SQLITE_UTF8|SQLITE_INNOCUOUS|SQLITE_DETERMINISTIC, |
| 842 | | - 0, re_sql_func, 0, 0); |
| 843 | | - if( rc==SQLITE_OK ){ |
| 844 | | - /* The regexpi(PATTERN,STRING) function is a case-insensitive version |
| 845 | | - ** of regexp(PATTERN,STRING). */ |
| 846 | | - rc = sqlite3_create_function(db, "regexpi", 2, |
| 847 | | - SQLITE_UTF8|SQLITE_INNOCUOUS|SQLITE_DETERMINISTIC, |
| 848 | | - (void*)db, re_sql_func, 0, 0); |
| 849 | | - } |
| 850 | | - return rc; |
| 844 | + int mxLen = g.db ? db_get_int("regexp-limit",1000) : 1000; |
| 845 | + return re_compile(ppRe, zIn, mxLen, noCase); |
| 851 | 846 | } |
| 852 | 847 | |
| 853 | 848 | /* |
| 854 | 849 | ** Run a "grep" over a single file read from disk. |
| 855 | 850 | */ |
| 856 | 851 | |