Fossil SCM

Fix to the regexp() SQL function implementation (pointed out by [https://fossil-scm.org/forum/forumpost/c9eb457131|Florian Balmer]) so that it properly rejects all overlong 3-byte UTF8 characters, converting them into 0xfffd.

drh 2019-12-19 20:56 trunk
Commit 6b32e213056cea75554b25427f3c28d22fd1325dadcd1024ccf2b8ae7080973e
1 file changed +1 -1
+1 -1
--- src/regexp.c
+++ src/regexp.c
@@ -107,11 +107,11 @@
107107
if( c<0x80 ) c = 0xfffd;
108108
}else if( (c&0xf0)==0xe0 && p->i+1<p->mx && (p->z[p->i]&0xc0)==0x80
109109
&& (p->z[p->i+1]&0xc0)==0x80 ){
110110
c = (c&0x0f)<<12 | ((p->z[p->i]&0x3f)<<6) | (p->z[p->i+1]&0x3f);
111111
p->i += 2;
112
- if( c<=0x3ff || (c>=0xd800 && c<=0xdfff) ) c = 0xfffd;
112
+ if( c<=0x7ff || (c>=0xd800 && c<=0xdfff) ) c = 0xfffd;
113113
}else if( (c&0xf8)==0xf0 && p->i+3<p->mx && (p->z[p->i]&0xc0)==0x80
114114
&& (p->z[p->i+1]&0xc0)==0x80 && (p->z[p->i+2]&0xc0)==0x80 ){
115115
c = (c&0x07)<<18 | ((p->z[p->i]&0x3f)<<12) | ((p->z[p->i+1]&0x3f)<<6)
116116
| (p->z[p->i+2]&0x3f);
117117
p->i += 3;
118118
--- src/regexp.c
+++ src/regexp.c
@@ -107,11 +107,11 @@
107 if( c<0x80 ) c = 0xfffd;
108 }else if( (c&0xf0)==0xe0 && p->i+1<p->mx && (p->z[p->i]&0xc0)==0x80
109 && (p->z[p->i+1]&0xc0)==0x80 ){
110 c = (c&0x0f)<<12 | ((p->z[p->i]&0x3f)<<6) | (p->z[p->i+1]&0x3f);
111 p->i += 2;
112 if( c<=0x3ff || (c>=0xd800 && c<=0xdfff) ) c = 0xfffd;
113 }else if( (c&0xf8)==0xf0 && p->i+3<p->mx && (p->z[p->i]&0xc0)==0x80
114 && (p->z[p->i+1]&0xc0)==0x80 && (p->z[p->i+2]&0xc0)==0x80 ){
115 c = (c&0x07)<<18 | ((p->z[p->i]&0x3f)<<12) | ((p->z[p->i+1]&0x3f)<<6)
116 | (p->z[p->i+2]&0x3f);
117 p->i += 3;
118
--- src/regexp.c
+++ src/regexp.c
@@ -107,11 +107,11 @@
107 if( c<0x80 ) c = 0xfffd;
108 }else if( (c&0xf0)==0xe0 && p->i+1<p->mx && (p->z[p->i]&0xc0)==0x80
109 && (p->z[p->i+1]&0xc0)==0x80 ){
110 c = (c&0x0f)<<12 | ((p->z[p->i]&0x3f)<<6) | (p->z[p->i+1]&0x3f);
111 p->i += 2;
112 if( c<=0x7ff || (c>=0xd800 && c<=0xdfff) ) c = 0xfffd;
113 }else if( (c&0xf8)==0xf0 && p->i+3<p->mx && (p->z[p->i]&0xc0)==0x80
114 && (p->z[p->i+1]&0xc0)==0x80 && (p->z[p->i+2]&0xc0)==0x80 ){
115 c = (c&0x07)<<18 | ((p->z[p->i]&0x3f)<<12) | ((p->z[p->i+1]&0x3f)<<6)
116 | (p->z[p->i+2]&0x3f);
117 p->i += 3;
118

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button