Fossil SCM

more optimizations (all bytes between 0x80 & 0xBF are invalid, so use simple check for those, and also can shrink the invalid_utf8 lead byte table even more)

sdr 2016-06-16 17:01 invalid_utf8_table
Commit 6eb9a30c085d755122b7f875ade5636fdb7300b2
1 file changed +3 -9
+3 -9
--- src/lookslike.c
+++ src/lookslike.c
@@ -155,18 +155,10 @@
155155
#define US0A 0xFF, 0xFF, 0x00 /* for any other lead byte */
156156
157157
/* a table used for quick lookup of the definition that goes with a
158158
* particular lead byte */
159159
static const unsigned char lb_tab[] = {
160
- US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A,
161
- US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A,
162
- US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A,
163
- US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A,
164
- US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A,
165
- US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A,
166
- US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A,
167
- US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A,
168160
US2A, US0A, US2B, US2B, US2B, US2B, US2B, US2B,
169161
US2B, US2B, US2B, US2B, US2B, US2B, US2B, US2B,
170162
US2B, US2B, US2B, US2B, US2B, US2B, US2B, US2B,
171163
US2B, US2B, US2B, US2B, US2B, US2B, US2B, US2B,
172164
US3A, US3B, US3B, US3B, US3B, US3B, US3B, US3B,
@@ -186,13 +178,15 @@
186178
while( n>0 ){
187179
/* ascii is trivial */
188180
if( *z<0x80 ){
189181
++z;
190182
--n;
183
+ }else if( *z<0xC0 ){
184
+ return LOOK_INVALID;
191185
}else{
192186
/* get the definition for this lead byte */
193
- const unsigned char* def = &lb_tab[(3 * *z++)-0x180];
187
+ const unsigned char* def = &lb_tab[(3 * *z++)-0x240];
194188
unsigned char len;
195189
196190
/* get the expected sequence length */
197191
len = *def;
198192
/* if there aren't enough bytes left, return invalid */
199193
--- src/lookslike.c
+++ src/lookslike.c
@@ -155,18 +155,10 @@
155 #define US0A 0xFF, 0xFF, 0x00 /* for any other lead byte */
156
157 /* a table used for quick lookup of the definition that goes with a
158 * particular lead byte */
159 static const unsigned char lb_tab[] = {
160 US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A,
161 US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A,
162 US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A,
163 US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A,
164 US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A,
165 US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A,
166 US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A,
167 US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A,
168 US2A, US0A, US2B, US2B, US2B, US2B, US2B, US2B,
169 US2B, US2B, US2B, US2B, US2B, US2B, US2B, US2B,
170 US2B, US2B, US2B, US2B, US2B, US2B, US2B, US2B,
171 US2B, US2B, US2B, US2B, US2B, US2B, US2B, US2B,
172 US3A, US3B, US3B, US3B, US3B, US3B, US3B, US3B,
@@ -186,13 +178,15 @@
186 while( n>0 ){
187 /* ascii is trivial */
188 if( *z<0x80 ){
189 ++z;
190 --n;
 
 
191 }else{
192 /* get the definition for this lead byte */
193 const unsigned char* def = &lb_tab[(3 * *z++)-0x180];
194 unsigned char len;
195
196 /* get the expected sequence length */
197 len = *def;
198 /* if there aren't enough bytes left, return invalid */
199
--- src/lookslike.c
+++ src/lookslike.c
@@ -155,18 +155,10 @@
155 #define US0A 0xFF, 0xFF, 0x00 /* for any other lead byte */
156
157 /* a table used for quick lookup of the definition that goes with a
158 * particular lead byte */
159 static const unsigned char lb_tab[] = {
 
 
 
 
 
 
 
 
160 US2A, US0A, US2B, US2B, US2B, US2B, US2B, US2B,
161 US2B, US2B, US2B, US2B, US2B, US2B, US2B, US2B,
162 US2B, US2B, US2B, US2B, US2B, US2B, US2B, US2B,
163 US2B, US2B, US2B, US2B, US2B, US2B, US2B, US2B,
164 US3A, US3B, US3B, US3B, US3B, US3B, US3B, US3B,
@@ -186,13 +178,15 @@
178 while( n>0 ){
179 /* ascii is trivial */
180 if( *z<0x80 ){
181 ++z;
182 --n;
183 }else if( *z<0xC0 ){
184 return LOOK_INVALID;
185 }else{
186 /* get the definition for this lead byte */
187 const unsigned char* def = &lb_tab[(3 * *z++)-0x240];
188 unsigned char len;
189
190 /* get the expected sequence length */
191 len = *def;
192 /* if there aren't enough bytes left, return invalid */
193

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button