Fossil SCM
Fix to the regexp() SQL function implementation (pointed out by [https://fossil-scm.org/forum/forumpost/c9eb457131|Florian Balmer]) so that it properly rejects all overlong 3-byte UTF8 characters, converting them into 0xfffd.
Commit
6b32e213056cea75554b25427f3c28d22fd1325dadcd1024ccf2b8ae7080973e
Parent
8628cbd428cb507…
1 file changed
+1
-1
+1
-1
| --- src/regexp.c | ||
| +++ src/regexp.c | ||
| @@ -107,11 +107,11 @@ | ||
| 107 | 107 | if( c<0x80 ) c = 0xfffd; |
| 108 | 108 | }else if( (c&0xf0)==0xe0 && p->i+1<p->mx && (p->z[p->i]&0xc0)==0x80 |
| 109 | 109 | && (p->z[p->i+1]&0xc0)==0x80 ){ |
| 110 | 110 | c = (c&0x0f)<<12 | ((p->z[p->i]&0x3f)<<6) | (p->z[p->i+1]&0x3f); |
| 111 | 111 | p->i += 2; |
| 112 | - if( c<=0x3ff || (c>=0xd800 && c<=0xdfff) ) c = 0xfffd; | |
| 112 | + if( c<=0x7ff || (c>=0xd800 && c<=0xdfff) ) c = 0xfffd; | |
| 113 | 113 | }else if( (c&0xf8)==0xf0 && p->i+3<p->mx && (p->z[p->i]&0xc0)==0x80 |
| 114 | 114 | && (p->z[p->i+1]&0xc0)==0x80 && (p->z[p->i+2]&0xc0)==0x80 ){ |
| 115 | 115 | c = (c&0x07)<<18 | ((p->z[p->i]&0x3f)<<12) | ((p->z[p->i+1]&0x3f)<<6) |
| 116 | 116 | | (p->z[p->i+2]&0x3f); |
| 117 | 117 | p->i += 3; |
| 118 | 118 |
| --- src/regexp.c | |
| +++ src/regexp.c | |
| @@ -107,11 +107,11 @@ | |
| 107 | if( c<0x80 ) c = 0xfffd; |
| 108 | }else if( (c&0xf0)==0xe0 && p->i+1<p->mx && (p->z[p->i]&0xc0)==0x80 |
| 109 | && (p->z[p->i+1]&0xc0)==0x80 ){ |
| 110 | c = (c&0x0f)<<12 | ((p->z[p->i]&0x3f)<<6) | (p->z[p->i+1]&0x3f); |
| 111 | p->i += 2; |
| 112 | if( c<=0x3ff || (c>=0xd800 && c<=0xdfff) ) c = 0xfffd; |
| 113 | }else if( (c&0xf8)==0xf0 && p->i+3<p->mx && (p->z[p->i]&0xc0)==0x80 |
| 114 | && (p->z[p->i+1]&0xc0)==0x80 && (p->z[p->i+2]&0xc0)==0x80 ){ |
| 115 | c = (c&0x07)<<18 | ((p->z[p->i]&0x3f)<<12) | ((p->z[p->i+1]&0x3f)<<6) |
| 116 | | (p->z[p->i+2]&0x3f); |
| 117 | p->i += 3; |
| 118 |
| --- src/regexp.c | |
| +++ src/regexp.c | |
| @@ -107,11 +107,11 @@ | |
| 107 | if( c<0x80 ) c = 0xfffd; |
| 108 | }else if( (c&0xf0)==0xe0 && p->i+1<p->mx && (p->z[p->i]&0xc0)==0x80 |
| 109 | && (p->z[p->i+1]&0xc0)==0x80 ){ |
| 110 | c = (c&0x0f)<<12 | ((p->z[p->i]&0x3f)<<6) | (p->z[p->i+1]&0x3f); |
| 111 | p->i += 2; |
| 112 | if( c<=0x7ff || (c>=0xd800 && c<=0xdfff) ) c = 0xfffd; |
| 113 | }else if( (c&0xf8)==0xf0 && p->i+3<p->mx && (p->z[p->i]&0xc0)==0x80 |
| 114 | && (p->z[p->i+1]&0xc0)==0x80 && (p->z[p->i+2]&0xc0)==0x80 ){ |
| 115 | c = (c&0x07)<<18 | ((p->z[p->i]&0x3f)<<12) | ((p->z[p->i+1]&0x3f)<<6) |
| 116 | | (p->z[p->i+2]&0x3f); |
| 117 | p->i += 3; |
| 118 |