Fossil SCM

shrunk size of lead byte table for invalid_utf8, and took a shortcut to invalidate lead bytes between 0x80 & 0xBF inclusive

sdr 2016-06-16 22:14 trunk
Commit 69328517f5726df77c55f842ad934a992f849036
1 file changed +4 -10
+4 -10
--- src/lookslike.c
+++ src/lookslike.c
@@ -160,18 +160,10 @@
160160
#define US0A 0xFF, 0x00 /* for any other lead byte */
161161
162162
/* a table used for quick lookup of the definition that goes with a
163163
* particular lead byte */
164164
static const unsigned char lb_tab[] = {
165
- US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A,
166
- US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A,
167
- US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A,
168
- US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A,
169
- US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A,
170
- US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A,
171
- US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A,
172
- US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A,
173165
US2A, US0A, US2B, US2B, US2B, US2B, US2B, US2B,
174166
US2B, US2B, US2B, US2B, US2B, US2B, US2B, US2B,
175167
US2B, US2B, US2B, US2B, US2B, US2B, US2B, US2B,
176168
US2B, US2B, US2B, US2B, US2B, US2B, US2B, US2B,
177169
US3A, US3B, US3B, US3B, US3B, US3B, US3B, US3B,
@@ -190,20 +182,22 @@
190182
if( n==0 ) return 0; /* Empty file -> OK */
191183
c = *z;
192184
while( --n>0 ){
193185
c2 = c;
194186
c = *++z;
195
- if( c2>=0x80 ){
196
- const unsigned char *def = &lb_tab[(2*c2)-0x100];
187
+ if( c2>=0xC0 ){
188
+ const unsigned char *def = &lb_tab[(2*c2)-0x180];
197189
if( (c<*def) || (c>*++def) ){
198190
return LOOK_INVALID; /* Invalid UTF-8 */
199191
}
200192
if( c2>=0xe0 ){
201193
c = (c2<<1)|3;
202194
}else{
203195
c = ' ';
204196
}
197
+ }else if( c2>=0x80 ){
198
+ return LOOK_INVALID;
205199
}
206200
}
207201
return (c>=0x80) ? LOOK_INVALID : 0; /* Last byte must be ASCII. */
208202
}
209203
210204
--- src/lookslike.c
+++ src/lookslike.c
@@ -160,18 +160,10 @@
160 #define US0A 0xFF, 0x00 /* for any other lead byte */
161
162 /* a table used for quick lookup of the definition that goes with a
163 * particular lead byte */
164 static const unsigned char lb_tab[] = {
165 US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A,
166 US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A,
167 US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A,
168 US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A,
169 US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A,
170 US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A,
171 US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A,
172 US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A,
173 US2A, US0A, US2B, US2B, US2B, US2B, US2B, US2B,
174 US2B, US2B, US2B, US2B, US2B, US2B, US2B, US2B,
175 US2B, US2B, US2B, US2B, US2B, US2B, US2B, US2B,
176 US2B, US2B, US2B, US2B, US2B, US2B, US2B, US2B,
177 US3A, US3B, US3B, US3B, US3B, US3B, US3B, US3B,
@@ -190,20 +182,22 @@
190 if( n==0 ) return 0; /* Empty file -> OK */
191 c = *z;
192 while( --n>0 ){
193 c2 = c;
194 c = *++z;
195 if( c2>=0x80 ){
196 const unsigned char *def = &lb_tab[(2*c2)-0x100];
197 if( (c<*def) || (c>*++def) ){
198 return LOOK_INVALID; /* Invalid UTF-8 */
199 }
200 if( c2>=0xe0 ){
201 c = (c2<<1)|3;
202 }else{
203 c = ' ';
204 }
 
 
205 }
206 }
207 return (c>=0x80) ? LOOK_INVALID : 0; /* Last byte must be ASCII. */
208 }
209
210
--- src/lookslike.c
+++ src/lookslike.c
@@ -160,18 +160,10 @@
160 #define US0A 0xFF, 0x00 /* for any other lead byte */
161
162 /* a table used for quick lookup of the definition that goes with a
163 * particular lead byte */
164 static const unsigned char lb_tab[] = {
 
 
 
 
 
 
 
 
165 US2A, US0A, US2B, US2B, US2B, US2B, US2B, US2B,
166 US2B, US2B, US2B, US2B, US2B, US2B, US2B, US2B,
167 US2B, US2B, US2B, US2B, US2B, US2B, US2B, US2B,
168 US2B, US2B, US2B, US2B, US2B, US2B, US2B, US2B,
169 US3A, US3B, US3B, US3B, US3B, US3B, US3B, US3B,
@@ -190,20 +182,22 @@
182 if( n==0 ) return 0; /* Empty file -> OK */
183 c = *z;
184 while( --n>0 ){
185 c2 = c;
186 c = *++z;
187 if( c2>=0xC0 ){
188 const unsigned char *def = &lb_tab[(2*c2)-0x180];
189 if( (c<*def) || (c>*++def) ){
190 return LOOK_INVALID; /* Invalid UTF-8 */
191 }
192 if( c2>=0xe0 ){
193 c = (c2<<1)|3;
194 }else{
195 c = ' ';
196 }
197 }else if( c2>=0x80 ){
198 return LOOK_INVALID;
199 }
200 }
201 return (c>=0x80) ? LOOK_INVALID : 0; /* Last byte must be ASCII. */
202 }
203
204

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button