| | @@ -24,11 +24,11 @@ |
| 24 | 24 | ** The following regular expression syntax is supported: |
| 25 | 25 | ** |
| 26 | 26 | ** X* zero or more occurrences of X |
| 27 | 27 | ** X+ one or more occurrences of X |
| 28 | 28 | ** X? zero or one occurrences of X |
| 29 | | -** X{p,q} between p and q occurrences of X, 0 <= p,q <= 999 |
| 29 | +** X{p,q} between p and q occurrences of X |
| 30 | 30 | ** (X) match X |
| 31 | 31 | ** X|Y X or Y |
| 32 | 32 | ** ^X X occurring at the beginning of the string |
| 33 | 33 | ** X$ X occurring at the end of the string |
| 34 | 34 | ** . Match any single character |
| | @@ -53,13 +53,10 @@ |
| 53 | 53 | ** expression and M is the size of the input string. The matcher never |
| 54 | 54 | ** exhibits exponential behavior. Note that the X{p,q} operator expands |
| 55 | 55 | ** to p copies of X following by q-p copies of X? and that the size of the |
| 56 | 56 | ** regular expression in the O(N*M) performance bound is computed after |
| 57 | 57 | ** this expansion. |
| 58 | | -** |
| 59 | | -** To help prevent DoS attacks, the values of p and q in the "{p,q}" syntax |
| 60 | | -** are limited to SQLITE_MAX_REGEXP_REPEAT, default 999. |
| 61 | 58 | */ |
| 62 | 59 | #include "config.h" |
| 63 | 60 | #include "regexp.h" |
| 64 | 61 | |
| 65 | 62 | #ifndef SQLITE_MAX_REGEXP_REPEAT |
| | @@ -125,10 +122,11 @@ |
| 125 | 122 | unsigned (*xNextChar)(ReInput*); /* Next character function */ |
| 126 | 123 | unsigned char zInit[12]; /* Initial text to match */ |
| 127 | 124 | int nInit; /* Number of characters in zInit */ |
| 128 | 125 | unsigned nState; /* Number of entries in aOp[] and aArg[] */ |
| 129 | 126 | unsigned nAlloc; /* Slots allocated for aOp[] and aArg[] */ |
| 127 | + unsigned mxAlloc; /* Complexity limit */ |
| 130 | 128 | }; |
| 131 | 129 | #endif |
| 132 | 130 | |
| 133 | 131 | /* Add a state to the given state set if it is not already there */ |
| 134 | 132 | static void re_add_state(ReStateSet *pSet, int newState){ |
| | @@ -341,15 +339,16 @@ |
| 341 | 339 | /* Resize the opcode and argument arrays for an RE under construction. |
| 342 | 340 | */ |
| 343 | 341 | static int re_resize(ReCompiled *p, int N){ |
| 344 | 342 | char *aOp; |
| 345 | 343 | int *aArg; |
| 344 | + if( N>p->mxAlloc ){ p->zErr = "REGEXP pattern too big"; return 1; } |
| 346 | 345 | aOp = fossil_realloc(p->aOp, N*sizeof(p->aOp[0])); |
| 347 | | - if( aOp==0 ) return 1; |
| 346 | + if( aOp==0 ){ p->zErr = "out of memory"; return 1; } |
| 348 | 347 | p->aOp = aOp; |
| 349 | 348 | aArg = fossil_realloc(p->aArg, N*sizeof(p->aArg[0])); |
| 350 | | - if( aArg==0 ) return 1; |
| 349 | + if( aArg==0 ){ p->zErr = "out of memory"; return 1; } |
| 351 | 350 | p->aArg = aArg; |
| 352 | 351 | p->nAlloc = N; |
| 353 | 352 | return 0; |
| 354 | 353 | } |
| 355 | 354 | |
| | @@ -534,20 +533,20 @@ |
| 534 | 533 | unsigned int m = 0, n = 0; |
| 535 | 534 | unsigned int sz, j; |
| 536 | 535 | if( iPrev<0 ) return "'{m,n}' without operand"; |
| 537 | 536 | while( (c=rePeek(p))>='0' && c<='9' ){ |
| 538 | 537 | m = m*10 + c - '0'; |
| 539 | | - if( m>SQLITE_MAX_REGEXP_REPEAT ) return "integer too large"; |
| 538 | + if( m*2>p->mxAlloc ) return "REGEXP pattern too big"; |
| 540 | 539 | p->sIn.i++; |
| 541 | 540 | } |
| 542 | 541 | n = m; |
| 543 | 542 | if( c==',' ){ |
| 544 | 543 | p->sIn.i++; |
| 545 | 544 | n = 0; |
| 546 | 545 | while( (c=rePeek(p))>='0' && c<='9' ){ |
| 547 | 546 | n = n*10 + c-'0'; |
| 548 | | - if( n>SQLITE_MAX_REGEXP_REPEAT ) return "integer too large"; |
| 547 | + if( n*2>p->mxAlloc ) return "REGEXP pattern too big"; |
| 549 | 548 | p->sIn.i++; |
| 550 | 549 | } |
| 551 | 550 | } |
| 552 | 551 | if( c!='}' ) return "unmatched '{'"; |
| 553 | 552 | if( n>0 && n<m ) return "n less than m in '{m,n}'"; |
| | @@ -564,11 +563,11 @@ |
| 564 | 563 | for(j=m; j<n; j++){ |
| 565 | 564 | re_append(p, RE_OP_FORK, sz+1); |
| 566 | 565 | re_copy(p, iPrev, sz); |
| 567 | 566 | } |
| 568 | 567 | if( n==0 && m>0 ){ |
| 569 | | - re_append(p, RE_OP_FORK, -sz); |
| 568 | + re_append(p, RE_OP_FORK, -(int)sz); |
| 570 | 569 | } |
| 571 | 570 | break; |
| 572 | 571 | } |
| 573 | 572 | case '[': { |
| 574 | 573 | unsigned int iFirst = p->nState; |
| | @@ -644,11 +643,16 @@ |
| 644 | 643 | ** Compile a textual regular expression in zIn[] into a compiled regular |
| 645 | 644 | ** expression suitable for us by re_match() and return a pointer to the |
| 646 | 645 | ** compiled regular expression in *ppRe. Return NULL on success or an |
| 647 | 646 | ** error message if something goes wrong. |
| 648 | 647 | */ |
| 649 | | -const char *re_compile(ReCompiled **ppRe, const char *zIn, int noCase){ |
| 648 | +const char *re_compile( |
| 649 | + ReCompiled **ppRe, /* OUT: write compiled NFA here */ |
| 650 | + const char *zIn, /* Input regular expression */ |
| 651 | + int mxRe, /* Complexity limit */ |
| 652 | + int noCase /* True for caseless comparisons */ |
| 653 | +){ |
| 650 | 654 | ReCompiled *pRe; |
| 651 | 655 | const char *zErr; |
| 652 | 656 | int i, j; |
| 653 | 657 | |
| 654 | 658 | *ppRe = 0; |
| | @@ -656,13 +660,15 @@ |
| 656 | 660 | if( pRe==0 ){ |
| 657 | 661 | return "out of memory"; |
| 658 | 662 | } |
| 659 | 663 | memset(pRe, 0, sizeof(*pRe)); |
| 660 | 664 | pRe->xNextChar = noCase ? re_next_char_nocase : re_next_char; |
| 665 | + pRe->mxAlloc = mxRe; |
| 661 | 666 | if( re_resize(pRe, 30) ){ |
| 667 | + zErr = pRe->zErr; |
| 662 | 668 | re_free(pRe); |
| 663 | | - return "out of memory"; |
| 669 | + return zErr; |
| 664 | 670 | } |
| 665 | 671 | if( zIn[0]=='^' ){ |
| 666 | 672 | zIn++; |
| 667 | 673 | }else{ |
| 668 | 674 | re_append(pRe, RE_OP_ANYSTAR, 0); |
| | @@ -749,10 +755,36 @@ |
| 749 | 755 | zIn++; |
| 750 | 756 | } |
| 751 | 757 | blob_materialize(&out); |
| 752 | 758 | return out.aData; |
| 753 | 759 | } |
| 760 | + |
| 761 | +/* |
| 762 | +** SETTING: regexp-limit width=8 default=1000 |
| 763 | +** |
| 764 | +** Limit the size of the bytecode used to implement a regular expression |
| 765 | +** to this many steps. It is important to limit this to avoid possible |
| 766 | +** DoS attacks. |
| 767 | +*/ |
| 768 | + |
| 769 | +/* |
| 770 | +** Compute a reasonable limit on the length of the REGEXP NFA. |
| 771 | +*/ |
| 772 | +int re_maxlen(void){ |
| 773 | + return g.db ? db_get_int("regexp-limit", 1000) : 1000; |
| 774 | +} |
| 775 | + |
| 776 | +/* |
| 777 | +** Compile an RE using re_maxlen(). |
| 778 | +*/ |
| 779 | +const char *fossil_re_compile( |
| 780 | + ReCompiled **ppRe, /* OUT: write compiled NFA here */ |
| 781 | + const char *zIn, /* Input regular expression */ |
| 782 | + int noCase /* True for caseless comparisons */ |
| 783 | +){ |
| 784 | + return re_compile(ppRe, zIn, re_maxlen(), noCase); |
| 785 | +} |
| 754 | 786 | |
| 755 | 787 | /* |
| 756 | 788 | ** Implementation of the regexp() SQL function. This function implements |
| 757 | 789 | ** the build-in REGEXP operator. The first argument to the function is the |
| 758 | 790 | ** pattern and the second argument is the string. So, the SQL statements: |
| | @@ -775,11 +807,11 @@ |
| 775 | 807 | (void)argc; /* Unused */ |
| 776 | 808 | pRe = sqlite3_get_auxdata(context, 0); |
| 777 | 809 | if( pRe==0 ){ |
| 778 | 810 | zPattern = (const char*)sqlite3_value_text(argv[0]); |
| 779 | 811 | if( zPattern==0 ) return; |
| 780 | | - zErr = re_compile(&pRe, zPattern, sqlite3_user_data(context)!=0); |
| 812 | + zErr = fossil_re_compile(&pRe, zPattern, sqlite3_user_data(context)!=0); |
| 781 | 813 | if( zErr ){ |
| 782 | 814 | re_free(pRe); |
| 783 | 815 | sqlite3_result_int(context, 0); |
| 784 | 816 | /* sqlite3_result_error(context, zErr, -1); */ |
| 785 | 817 | return; |
| | @@ -803,16 +835,18 @@ |
| 803 | 835 | ** Invoke this routine to register the regexp() function with the |
| 804 | 836 | ** SQLite database connection. |
| 805 | 837 | */ |
| 806 | 838 | int re_add_sql_func(sqlite3 *db){ |
| 807 | 839 | int rc; |
| 808 | | - rc = sqlite3_create_function(db, "regexp", 2, SQLITE_UTF8|SQLITE_INNOCUOUS, |
| 840 | + rc = sqlite3_create_function(db, "regexp", 2, |
| 841 | + SQLITE_UTF8|SQLITE_INNOCUOUS|SQLITE_DETERMINISTIC, |
| 809 | 842 | 0, re_sql_func, 0, 0); |
| 810 | 843 | if( rc==SQLITE_OK ){ |
| 811 | 844 | /* The regexpi(PATTERN,STRING) function is a case-insensitive version |
| 812 | 845 | ** of regexp(PATTERN,STRING). */ |
| 813 | | - rc = sqlite3_create_function(db, "regexpi", 2, SQLITE_UTF8|SQLITE_INNOCUOUS, |
| 846 | + rc = sqlite3_create_function(db, "regexpi", 2, |
| 847 | + SQLITE_UTF8|SQLITE_INNOCUOUS|SQLITE_DETERMINISTIC, |
| 814 | 848 | (void*)db, re_sql_func, 0, 0); |
| 815 | 849 | } |
| 816 | 850 | return rc; |
| 817 | 851 | } |
| 818 | 852 | |
| | @@ -891,18 +925,18 @@ |
| 891 | 925 | if( bRobot ){ |
| 892 | 926 | const char *zRe; |
| 893 | 927 | db_find_and_open_repository(0,0); |
| 894 | 928 | verify_all_options(); |
| 895 | 929 | zRe = db_get("robot-exception","^$"); |
| 896 | | - zErr = re_compile(&pRe, zRe, ignoreCase); |
| 930 | + zErr = fossil_re_compile(&pRe, zRe, ignoreCase); |
| 897 | 931 | iFileList = 2; |
| 898 | 932 | }else{ |
| 899 | 933 | verify_all_options(); |
| 900 | 934 | if( g.argc<3 ){ |
| 901 | 935 | usage("REGEXP [FILE...]"); |
| 902 | 936 | } |
| 903 | | - zErr = re_compile(&pRe, g.argv[2], ignoreCase); |
| 937 | + zErr = fossil_re_compile(&pRe, g.argv[2], ignoreCase); |
| 904 | 938 | } |
| 905 | 939 | if( zErr ) fossil_fatal("%s", zErr); |
| 906 | 940 | if( g.argc==iFileList ){ |
| 907 | 941 | grep_file(pRe, "-", stdin); |
| 908 | 942 | }else{ |
| | @@ -980,11 +1014,11 @@ |
| 980 | 1014 | db_find_and_open_repository(0, 0); |
| 981 | 1015 | verify_all_options(); |
| 982 | 1016 | if( g.argc<4 ){ |
| 983 | 1017 | usage("REGEXP FILENAME ..."); |
| 984 | 1018 | } |
| 985 | | - zErr = re_compile(&pRe, g.argv[2], ignoreCase); |
| 1019 | + zErr = fossil_re_compile(&pRe, g.argv[2], ignoreCase); |
| 986 | 1020 | if( zErr ) fossil_fatal("%s", zErr); |
| 987 | 1021 | |
| 988 | 1022 | add_content_sql_commands(g.db); |
| 989 | 1023 | db_multi_exec("CREATE TEMP TABLE arglist(iname,fname,fnid);"); |
| 990 | 1024 | for(ii=3; ii<g.argc; ii++){ |
| 991 | 1025 | |