Fossil SCM
Off-by-one error in regexp \u escape sequence parsing. Regexp \x escape sequence should only accept exactly 2 hex digits.
Commit
e4ca677a6c9d066bf4a6f51df4c7ed8e7131ce36
Parent
a18a49c875a0e7c…
1 file changed
+9
-9
+9
-9
| --- src/regexp.c | ||
| +++ src/regexp.c | ||
| @@ -32,11 +32,11 @@ | ||
| 32 | 32 | ** X$ X occurring at the end of the string |
| 33 | 33 | ** . Match any single character |
| 34 | 34 | ** \c Character c where c is one of \{}()[]|*+?. |
| 35 | 35 | ** \c C-language escapes for c in afnrtv. ex: \t or \n |
| 36 | 36 | ** \uXXXX Where XXXX is exactly 4 hex digits, unicode value XXXX |
| 37 | -** \xXXX Where XXX is any number of hex digits, unicode value XXX | |
| 37 | +** \xXX Where XX is exactly 2 hex digits, unicode value XX | |
| 38 | 38 | ** [abc] Any single character from the set abc |
| 39 | 39 | ** [^abc] Any single character not in the set abc |
| 40 | 40 | ** [a-z] Any single character in the range a-z |
| 41 | 41 | ** [^a-z] Any single character not in the range a-z |
| 42 | 42 | ** \b Word boundary |
| @@ -381,22 +381,21 @@ | ||
| 381 | 381 | *pV = (*pV)*16 + (c & 0xff); |
| 382 | 382 | return 1; |
| 383 | 383 | } |
| 384 | 384 | |
| 385 | 385 | /* A backslash character has been seen, read the next character and |
| 386 | -** return its intepretation. | |
| 386 | +** return its interpretation. | |
| 387 | 387 | */ |
| 388 | 388 | static unsigned re_esc_char(ReCompiled *p){ |
| 389 | 389 | static const char zEsc[] = "afnrtv\\()*.+?[$^{|}]"; |
| 390 | 390 | static const char zTrans[] = "\a\f\n\r\t\v"; |
| 391 | 391 | int i, v = 0; |
| 392 | 392 | char c; |
| 393 | 393 | if( p->sIn.i>=p->sIn.mx ) return 0; |
| 394 | 394 | c = p->sIn.z[p->sIn.i]; |
| 395 | - if( c=='u' && p->sIn.i+5<p->sIn.mx ){ | |
| 395 | + if( c=='u' && p->sIn.i+4<p->sIn.mx ){ | |
| 396 | 396 | const unsigned char *zIn = p->sIn.z + p->sIn.i; |
| 397 | - v = 0; | |
| 398 | 397 | if( re_hex(zIn[1],&v) |
| 399 | 398 | && re_hex(zIn[2],&v) |
| 400 | 399 | && re_hex(zIn[3],&v) |
| 401 | 400 | && re_hex(zIn[4],&v) |
| 402 | 401 | ){ |
| @@ -403,15 +402,16 @@ | ||
| 403 | 402 | p->sIn.i += 5; |
| 404 | 403 | return v; |
| 405 | 404 | } |
| 406 | 405 | } |
| 407 | 406 | if( c=='x' ){ |
| 408 | - v = 0; | |
| 409 | - for(i=1; p->sIn.i<p->sIn.mx && re_hex(p->sIn.z[p->sIn.i+i], &v); i++){} | |
| 410 | - if( i>1 ){ | |
| 411 | - p->sIn.i += i; | |
| 412 | - return v; | |
| 407 | + const unsigned char *zIn = p->sIn.z + p->sIn.i; | |
| 408 | + if( p->sIn.i+2<p->sIn.mx ){ | |
| 409 | + if( re_hex(zIn[1],&v) && re_hex(zIn[2],&v) ){ | |
| 410 | + p->sIn.i += 3; | |
| 411 | + return v; | |
| 412 | + } | |
| 413 | 413 | } |
| 414 | 414 | } |
| 415 | 415 | for(i=0; zEsc[i] && zEsc[i]!=c; i++){} |
| 416 | 416 | if( zEsc[i] ){ |
| 417 | 417 | if( i<6 ) c = zTrans[i]; |
| 418 | 418 |
| --- src/regexp.c | |
| +++ src/regexp.c | |
| @@ -32,11 +32,11 @@ | |
| 32 | ** X$ X occurring at the end of the string |
| 33 | ** . Match any single character |
| 34 | ** \c Character c where c is one of \{}()[]|*+?. |
| 35 | ** \c C-language escapes for c in afnrtv. ex: \t or \n |
| 36 | ** \uXXXX Where XXXX is exactly 4 hex digits, unicode value XXXX |
| 37 | ** \xXXX Where XXX is any number of hex digits, unicode value XXX |
| 38 | ** [abc] Any single character from the set abc |
| 39 | ** [^abc] Any single character not in the set abc |
| 40 | ** [a-z] Any single character in the range a-z |
| 41 | ** [^a-z] Any single character not in the range a-z |
| 42 | ** \b Word boundary |
| @@ -381,22 +381,21 @@ | |
| 381 | *pV = (*pV)*16 + (c & 0xff); |
| 382 | return 1; |
| 383 | } |
| 384 | |
| 385 | /* A backslash character has been seen, read the next character and |
| 386 | ** return its intepretation. |
| 387 | */ |
| 388 | static unsigned re_esc_char(ReCompiled *p){ |
| 389 | static const char zEsc[] = "afnrtv\\()*.+?[$^{|}]"; |
| 390 | static const char zTrans[] = "\a\f\n\r\t\v"; |
| 391 | int i, v = 0; |
| 392 | char c; |
| 393 | if( p->sIn.i>=p->sIn.mx ) return 0; |
| 394 | c = p->sIn.z[p->sIn.i]; |
| 395 | if( c=='u' && p->sIn.i+5<p->sIn.mx ){ |
| 396 | const unsigned char *zIn = p->sIn.z + p->sIn.i; |
| 397 | v = 0; |
| 398 | if( re_hex(zIn[1],&v) |
| 399 | && re_hex(zIn[2],&v) |
| 400 | && re_hex(zIn[3],&v) |
| 401 | && re_hex(zIn[4],&v) |
| 402 | ){ |
| @@ -403,15 +402,16 @@ | |
| 403 | p->sIn.i += 5; |
| 404 | return v; |
| 405 | } |
| 406 | } |
| 407 | if( c=='x' ){ |
| 408 | v = 0; |
| 409 | for(i=1; p->sIn.i<p->sIn.mx && re_hex(p->sIn.z[p->sIn.i+i], &v); i++){} |
| 410 | if( i>1 ){ |
| 411 | p->sIn.i += i; |
| 412 | return v; |
| 413 | } |
| 414 | } |
| 415 | for(i=0; zEsc[i] && zEsc[i]!=c; i++){} |
| 416 | if( zEsc[i] ){ |
| 417 | if( i<6 ) c = zTrans[i]; |
| 418 |
| --- src/regexp.c | |
| +++ src/regexp.c | |
| @@ -32,11 +32,11 @@ | |
| 32 | ** X$ X occurring at the end of the string |
| 33 | ** . Match any single character |
| 34 | ** \c Character c where c is one of \{}()[]|*+?. |
| 35 | ** \c C-language escapes for c in afnrtv. ex: \t or \n |
| 36 | ** \uXXXX Where XXXX is exactly 4 hex digits, unicode value XXXX |
| 37 | ** \xXX Where XX is exactly 2 hex digits, unicode value XX |
| 38 | ** [abc] Any single character from the set abc |
| 39 | ** [^abc] Any single character not in the set abc |
| 40 | ** [a-z] Any single character in the range a-z |
| 41 | ** [^a-z] Any single character not in the range a-z |
| 42 | ** \b Word boundary |
| @@ -381,22 +381,21 @@ | |
| 381 | *pV = (*pV)*16 + (c & 0xff); |
| 382 | return 1; |
| 383 | } |
| 384 | |
| 385 | /* A backslash character has been seen, read the next character and |
| 386 | ** return its interpretation. |
| 387 | */ |
| 388 | static unsigned re_esc_char(ReCompiled *p){ |
| 389 | static const char zEsc[] = "afnrtv\\()*.+?[$^{|}]"; |
| 390 | static const char zTrans[] = "\a\f\n\r\t\v"; |
| 391 | int i, v = 0; |
| 392 | char c; |
| 393 | if( p->sIn.i>=p->sIn.mx ) return 0; |
| 394 | c = p->sIn.z[p->sIn.i]; |
| 395 | if( c=='u' && p->sIn.i+4<p->sIn.mx ){ |
| 396 | const unsigned char *zIn = p->sIn.z + p->sIn.i; |
| 397 | if( re_hex(zIn[1],&v) |
| 398 | && re_hex(zIn[2],&v) |
| 399 | && re_hex(zIn[3],&v) |
| 400 | && re_hex(zIn[4],&v) |
| 401 | ){ |
| @@ -403,15 +402,16 @@ | |
| 402 | p->sIn.i += 5; |
| 403 | return v; |
| 404 | } |
| 405 | } |
| 406 | if( c=='x' ){ |
| 407 | const unsigned char *zIn = p->sIn.z + p->sIn.i; |
| 408 | if( p->sIn.i+2<p->sIn.mx ){ |
| 409 | if( re_hex(zIn[1],&v) && re_hex(zIn[2],&v) ){ |
| 410 | p->sIn.i += 3; |
| 411 | return v; |
| 412 | } |
| 413 | } |
| 414 | } |
| 415 | for(i=0; zEsc[i] && zEsc[i]!=c; i++){} |
| 416 | if( zEsc[i] ){ |
| 417 | if( i<6 ) c = zTrans[i]; |
| 418 |