Fossil SCM

Unroll loop for even greater speed

jan.nijtmans 2016-06-16 07:46 invalid_utf8_table
Commit cc09e00358fd274a2d1167104e4539a35c852477
1 file changed +19 -9
+19 -9
--- src/lookslike.c
+++ src/lookslike.c
@@ -150,23 +150,23 @@
150150
};
151151
static const unsigned char us2b[] = { /* for lead bytes 0xC2-0xDF */
152152
2, 0x80, 0xBF
153153
};
154154
static const unsigned char us3a[] = { /* for lead byte 0xE0 */
155
- 3, 0xA0, 0xBF, 0x80, 0xBF
155
+ 3, 0xA0, 0xBF
156156
};
157157
static const unsigned char us3b[] = { /* for lead bytes 0xE1-0xEF */
158
- 3, 0x80, 0xBF, 0x80, 0xBF
158
+ 3, 0x80, 0xBF
159159
};
160160
static const unsigned char us4a[] = { /* for lead byte 0xF0 */
161
- 4, 0x90, 0xBF, 0x80, 0xBF, 0x80, 0xBF
161
+ 4, 0x90, 0xBF
162162
};
163163
static const unsigned char us4b[] = { /* for lead bytes 0xF1-0xF3 */
164
- 4, 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF
164
+ 4, 0x80, 0xBF
165165
};
166166
static const unsigned char us4c[] = { /* for lead byte 0xF4 */
167
- 4, 0x80, 0x8F, 0x80, 0xBF, 0x80, 0xBF
167
+ 4, 0x80, 0x8F
168168
};
169169
170170
/* a table used for quick lookup of the definition that goes with a
171171
* particular lead byte */
172172
static const unsigned char* const lb_tab[] = {
@@ -202,24 +202,34 @@
202202
++z;
203203
--n;
204204
}else{
205205
/* get the definition for this lead byte */
206206
const unsigned char* def = lb_tab[(*z++)-0x80];
207
- unsigned char i, len;
207
+ unsigned char len;
208208
209209
/* if the definition doesn't exist, return invalid */
210210
if( !def ) return LOOK_INVALID;
211211
/* get the expected sequence length */
212212
len = *def++;
213213
/* if there aren't enough bytes left, return invalid */
214
- if( n<len ) return LOOK_INVALID;
214
+ if( n<len ) {
215
+ return LOOK_INVALID;
216
+ }
215217
/* we already know byte #0 is good, so check the remaining bytes */
216
- for(i=1; i<len; ++i){
218
+ if( (*z<*def++) || (*z++>*def++) ){
217219
/* if the byte is outside the allowed range for this definition,
218220
* return invalid */
219
- if( (*z<*def++) || (*z++>*def++) ){
221
+ return LOOK_INVALID;
222
+ }
223
+ if( len > 2 ){
224
+ if( (*z<0x80) || (*z++>0xBF) ){
220225
return LOOK_INVALID;
226
+ }
227
+ if( len > 3 ){
228
+ if( (*z<0x80) || (*z++>0xBF) ){
229
+ return LOOK_INVALID;
230
+ }
221231
}
222232
}
223233
/* advance to the next sequence */
224234
n -= len;
225235
}
226236
--- src/lookslike.c
+++ src/lookslike.c
@@ -150,23 +150,23 @@
150 };
151 static const unsigned char us2b[] = { /* for lead bytes 0xC2-0xDF */
152 2, 0x80, 0xBF
153 };
154 static const unsigned char us3a[] = { /* for lead byte 0xE0 */
155 3, 0xA0, 0xBF, 0x80, 0xBF
156 };
157 static const unsigned char us3b[] = { /* for lead bytes 0xE1-0xEF */
158 3, 0x80, 0xBF, 0x80, 0xBF
159 };
160 static const unsigned char us4a[] = { /* for lead byte 0xF0 */
161 4, 0x90, 0xBF, 0x80, 0xBF, 0x80, 0xBF
162 };
163 static const unsigned char us4b[] = { /* for lead bytes 0xF1-0xF3 */
164 4, 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF
165 };
166 static const unsigned char us4c[] = { /* for lead byte 0xF4 */
167 4, 0x80, 0x8F, 0x80, 0xBF, 0x80, 0xBF
168 };
169
170 /* a table used for quick lookup of the definition that goes with a
171 * particular lead byte */
172 static const unsigned char* const lb_tab[] = {
@@ -202,24 +202,34 @@
202 ++z;
203 --n;
204 }else{
205 /* get the definition for this lead byte */
206 const unsigned char* def = lb_tab[(*z++)-0x80];
207 unsigned char i, len;
208
209 /* if the definition doesn't exist, return invalid */
210 if( !def ) return LOOK_INVALID;
211 /* get the expected sequence length */
212 len = *def++;
213 /* if there aren't enough bytes left, return invalid */
214 if( n<len ) return LOOK_INVALID;
 
 
215 /* we already know byte #0 is good, so check the remaining bytes */
216 for(i=1; i<len; ++i){
217 /* if the byte is outside the allowed range for this definition,
218 * return invalid */
219 if( (*z<*def++) || (*z++>*def++) ){
 
 
 
220 return LOOK_INVALID;
 
 
 
 
 
221 }
222 }
223 /* advance to the next sequence */
224 n -= len;
225 }
226
--- src/lookslike.c
+++ src/lookslike.c
@@ -150,23 +150,23 @@
150 };
151 static const unsigned char us2b[] = { /* for lead bytes 0xC2-0xDF */
152 2, 0x80, 0xBF
153 };
154 static const unsigned char us3a[] = { /* for lead byte 0xE0 */
155 3, 0xA0, 0xBF
156 };
157 static const unsigned char us3b[] = { /* for lead bytes 0xE1-0xEF */
158 3, 0x80, 0xBF
159 };
160 static const unsigned char us4a[] = { /* for lead byte 0xF0 */
161 4, 0x90, 0xBF
162 };
163 static const unsigned char us4b[] = { /* for lead bytes 0xF1-0xF3 */
164 4, 0x80, 0xBF
165 };
166 static const unsigned char us4c[] = { /* for lead byte 0xF4 */
167 4, 0x80, 0x8F
168 };
169
170 /* a table used for quick lookup of the definition that goes with a
171 * particular lead byte */
172 static const unsigned char* const lb_tab[] = {
@@ -202,24 +202,34 @@
202 ++z;
203 --n;
204 }else{
205 /* get the definition for this lead byte */
206 const unsigned char* def = lb_tab[(*z++)-0x80];
207 unsigned char len;
208
209 /* if the definition doesn't exist, return invalid */
210 if( !def ) return LOOK_INVALID;
211 /* get the expected sequence length */
212 len = *def++;
213 /* if there aren't enough bytes left, return invalid */
214 if( n<len ) {
215 return LOOK_INVALID;
216 }
217 /* we already know byte #0 is good, so check the remaining bytes */
218 if( (*z<*def++) || (*z++>*def++) ){
219 /* if the byte is outside the allowed range for this definition,
220 * return invalid */
221 return LOOK_INVALID;
222 }
223 if( len > 2 ){
224 if( (*z<0x80) || (*z++>0xBF) ){
225 return LOOK_INVALID;
226 }
227 if( len > 3 ){
228 if( (*z<0x80) || (*z++>0xBF) ){
229 return LOOK_INVALID;
230 }
231 }
232 }
233 /* advance to the next sequence */
234 n -= len;
235 }
236

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button