Fossil SCM

Off-by-one error in regexp \u escape sequence parsing. Regexp \x escape sequence should only accept exactly 2 hex digits.

drh 2013-01-11 18:12 trunk merge
Commit e4ca677a6c9d066bf4a6f51df4c7ed8e7131ce36
1 file changed +9 -9
+9 -9
--- src/regexp.c
+++ src/regexp.c
@@ -32,11 +32,11 @@
3232
** X$ X occurring at the end of the string
3333
** . Match any single character
3434
** \c Character c where c is one of \{}()[]|*+?.
3535
** \c C-language escapes for c in afnrtv. ex: \t or \n
3636
** \uXXXX Where XXXX is exactly 4 hex digits, unicode value XXXX
37
-** \xXXX Where XXX is any number of hex digits, unicode value XXX
37
+** \xXX Where XX is exactly 2 hex digits, unicode value XX
3838
** [abc] Any single character from the set abc
3939
** [^abc] Any single character not in the set abc
4040
** [a-z] Any single character in the range a-z
4141
** [^a-z] Any single character not in the range a-z
4242
** \b Word boundary
@@ -381,22 +381,21 @@
381381
*pV = (*pV)*16 + (c & 0xff);
382382
return 1;
383383
}
384384
385385
/* A backslash character has been seen, read the next character and
386
-** return its intepretation.
386
+** return its interpretation.
387387
*/
388388
static unsigned re_esc_char(ReCompiled *p){
389389
static const char zEsc[] = "afnrtv\\()*.+?[$^{|}]";
390390
static const char zTrans[] = "\a\f\n\r\t\v";
391391
int i, v = 0;
392392
char c;
393393
if( p->sIn.i>=p->sIn.mx ) return 0;
394394
c = p->sIn.z[p->sIn.i];
395
- if( c=='u' && p->sIn.i+5<p->sIn.mx ){
395
+ if( c=='u' && p->sIn.i+4<p->sIn.mx ){
396396
const unsigned char *zIn = p->sIn.z + p->sIn.i;
397
- v = 0;
398397
if( re_hex(zIn[1],&v)
399398
&& re_hex(zIn[2],&v)
400399
&& re_hex(zIn[3],&v)
401400
&& re_hex(zIn[4],&v)
402401
){
@@ -403,15 +402,16 @@
403402
p->sIn.i += 5;
404403
return v;
405404
}
406405
}
407406
if( c=='x' ){
408
- v = 0;
409
- for(i=1; p->sIn.i<p->sIn.mx && re_hex(p->sIn.z[p->sIn.i+i], &v); i++){}
410
- if( i>1 ){
411
- p->sIn.i += i;
412
- return v;
407
+ const unsigned char *zIn = p->sIn.z + p->sIn.i;
408
+ if( p->sIn.i+2<p->sIn.mx ){
409
+ if( re_hex(zIn[1],&v) && re_hex(zIn[2],&v) ){
410
+ p->sIn.i += 3;
411
+ return v;
412
+ }
413413
}
414414
}
415415
for(i=0; zEsc[i] && zEsc[i]!=c; i++){}
416416
if( zEsc[i] ){
417417
if( i<6 ) c = zTrans[i];
418418
--- src/regexp.c
+++ src/regexp.c
@@ -32,11 +32,11 @@
32 ** X$ X occurring at the end of the string
33 ** . Match any single character
34 ** \c Character c where c is one of \{}()[]|*+?.
35 ** \c C-language escapes for c in afnrtv. ex: \t or \n
36 ** \uXXXX Where XXXX is exactly 4 hex digits, unicode value XXXX
37 ** \xXXX Where XXX is any number of hex digits, unicode value XXX
38 ** [abc] Any single character from the set abc
39 ** [^abc] Any single character not in the set abc
40 ** [a-z] Any single character in the range a-z
41 ** [^a-z] Any single character not in the range a-z
42 ** \b Word boundary
@@ -381,22 +381,21 @@
381 *pV = (*pV)*16 + (c & 0xff);
382 return 1;
383 }
384
385 /* A backslash character has been seen, read the next character and
386 ** return its intepretation.
387 */
388 static unsigned re_esc_char(ReCompiled *p){
389 static const char zEsc[] = "afnrtv\\()*.+?[$^{|}]";
390 static const char zTrans[] = "\a\f\n\r\t\v";
391 int i, v = 0;
392 char c;
393 if( p->sIn.i>=p->sIn.mx ) return 0;
394 c = p->sIn.z[p->sIn.i];
395 if( c=='u' && p->sIn.i+5<p->sIn.mx ){
396 const unsigned char *zIn = p->sIn.z + p->sIn.i;
397 v = 0;
398 if( re_hex(zIn[1],&v)
399 && re_hex(zIn[2],&v)
400 && re_hex(zIn[3],&v)
401 && re_hex(zIn[4],&v)
402 ){
@@ -403,15 +402,16 @@
403 p->sIn.i += 5;
404 return v;
405 }
406 }
407 if( c=='x' ){
408 v = 0;
409 for(i=1; p->sIn.i<p->sIn.mx && re_hex(p->sIn.z[p->sIn.i+i], &v); i++){}
410 if( i>1 ){
411 p->sIn.i += i;
412 return v;
 
413 }
414 }
415 for(i=0; zEsc[i] && zEsc[i]!=c; i++){}
416 if( zEsc[i] ){
417 if( i<6 ) c = zTrans[i];
418
--- src/regexp.c
+++ src/regexp.c
@@ -32,11 +32,11 @@
32 ** X$ X occurring at the end of the string
33 ** . Match any single character
34 ** \c Character c where c is one of \{}()[]|*+?.
35 ** \c C-language escapes for c in afnrtv. ex: \t or \n
36 ** \uXXXX Where XXXX is exactly 4 hex digits, unicode value XXXX
37 ** \xXX Where XX is exactly 2 hex digits, unicode value XX
38 ** [abc] Any single character from the set abc
39 ** [^abc] Any single character not in the set abc
40 ** [a-z] Any single character in the range a-z
41 ** [^a-z] Any single character not in the range a-z
42 ** \b Word boundary
@@ -381,22 +381,21 @@
381 *pV = (*pV)*16 + (c & 0xff);
382 return 1;
383 }
384
385 /* A backslash character has been seen, read the next character and
386 ** return its interpretation.
387 */
388 static unsigned re_esc_char(ReCompiled *p){
389 static const char zEsc[] = "afnrtv\\()*.+?[$^{|}]";
390 static const char zTrans[] = "\a\f\n\r\t\v";
391 int i, v = 0;
392 char c;
393 if( p->sIn.i>=p->sIn.mx ) return 0;
394 c = p->sIn.z[p->sIn.i];
395 if( c=='u' && p->sIn.i+4<p->sIn.mx ){
396 const unsigned char *zIn = p->sIn.z + p->sIn.i;
 
397 if( re_hex(zIn[1],&v)
398 && re_hex(zIn[2],&v)
399 && re_hex(zIn[3],&v)
400 && re_hex(zIn[4],&v)
401 ){
@@ -403,15 +402,16 @@
402 p->sIn.i += 5;
403 return v;
404 }
405 }
406 if( c=='x' ){
407 const unsigned char *zIn = p->sIn.z + p->sIn.i;
408 if( p->sIn.i+2<p->sIn.mx ){
409 if( re_hex(zIn[1],&v) && re_hex(zIn[2],&v) ){
410 p->sIn.i += 3;
411 return v;
412 }
413 }
414 }
415 for(i=0; zEsc[i] && zEsc[i]!=c; i++){}
416 if( zEsc[i] ){
417 if( i<6 ) c = zTrans[i];
418

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button