Fossil SCM

More optimizations, taken over from trunk.

jan.nijtmans 2016-06-16 12:14 invalid_utf8_table merge
Commit ec7f6b2e71c5001416d37d2d5a88b63ab1b0da23
2 files changed +28 -43 +28 -43
+28 -43
--- src/lookslike.c
+++ src/lookslike.c
@@ -143,51 +143,38 @@
143143
** the derivatives CESU-8 & WTF-8 (as described in the same
144144
** wikipedia article referenced previously).
145145
*/
146146
147147
/* definitions for various UTF-8 sequence lengths */
148
-static const unsigned char us2a[] = { /* for lead byte 0xC0 */
149
- 2, 0x80, 0x80
150
-};
151
-static const unsigned char us2b[] = { /* for lead bytes 0xC2-0xDF */
152
- 2, 0x80, 0xBF
153
-};
154
-static const unsigned char us3a[] = { /* for lead byte 0xE0 */
155
- 3, 0xA0, 0xBF
156
-};
157
-static const unsigned char us3b[] = { /* for lead bytes 0xE1-0xEF */
158
- 3, 0x80, 0xBF
159
-};
160
-static const unsigned char us4a[] = { /* for lead byte 0xF0 */
161
- 4, 0x90, 0xBF
162
-};
163
-static const unsigned char us4b[] = { /* for lead bytes 0xF1-0xF3 */
164
- 4, 0x80, 0xBF
165
-};
166
-static const unsigned char us4c[] = { /* for lead byte 0xF4 */
167
- 4, 0x80, 0x8F
168
-};
148
+#define US2A 2, 0x80, 0x80 /* for lead byte 0xC0 */
149
+#define US2B 2, 0x80, 0xBF /* for lead bytes 0xC2-0xDF */
150
+#define US3A 3, 0xA0, 0xBF /* for lead byte 0xE0 */
151
+#define US3B 3, 0x80, 0xBF /* for lead bytes 0xE1-0xEF */
152
+#define US4A 4, 0x90, 0xBF /* for lead byte 0xF0 */
153
+#define US4B 4, 0x80, 0xBF /* for lead bytes 0xF1-0xF3 */
154
+#define US4C 4, 0x80, 0x8F /* for lead byte 0xF4 */
155
+#define US0A 0xFF, 0xFF, 0x00 /* for any other lead byte */
169156
170157
/* a table used for quick lookup of the definition that goes with a
171158
* particular lead byte */
172
-static const unsigned char* const lb_tab[] = {
173
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
174
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
175
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
176
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
177
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
178
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
179
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
180
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
181
- us2a, NULL, us2b, us2b, us2b, us2b, us2b, us2b,
182
- us2b, us2b, us2b, us2b, us2b, us2b, us2b, us2b,
183
- us2b, us2b, us2b, us2b, us2b, us2b, us2b, us2b,
184
- us2b, us2b, us2b, us2b, us2b, us2b, us2b, us2b,
185
- us3a, us3b, us3b, us3b, us3b, us3b, us3b, us3b,
186
- us3b, us3b, us3b, us3b, us3b, us3b, us3b, us3b,
187
- us4a, us4b, us4b, us4b, us4c, NULL, NULL, NULL,
188
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL
159
+static const unsigned char lb_tab[] = {
160
+ US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A,
161
+ US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A,
162
+ US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A,
163
+ US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A,
164
+ US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A,
165
+ US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A,
166
+ US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A,
167
+ US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A,
168
+ US2A, US0A, US2B, US2B, US2B, US2B, US2B, US2B,
169
+ US2B, US2B, US2B, US2B, US2B, US2B, US2B, US2B,
170
+ US2B, US2B, US2B, US2B, US2B, US2B, US2B, US2B,
171
+ US2B, US2B, US2B, US2B, US2B, US2B, US2B, US2B,
172
+ US3A, US3B, US3B, US3B, US3B, US3B, US3B, US3B,
173
+ US3B, US3B, US3B, US3B, US3B, US3B, US3B, US3B,
174
+ US4A, US4B, US4B, US4B, US4C, US0A, US0A, US0A,
175
+ US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A
189176
};
190177
191178
int invalid_utf8(
192179
const Blob *pContent
193180
){
@@ -201,23 +188,21 @@
201188
if( *z<0x80 ){
202189
++z;
203190
--n;
204191
}else{
205192
/* get the definition for this lead byte */
206
- const unsigned char* def = lb_tab[(*z++)-0x80];
193
+ const unsigned char* def = &lb_tab[(3 * *z++)-0x180];
207194
unsigned char len;
208195
209
- /* if the definition doesn't exist, return invalid */
210
- if( !def ) return LOOK_INVALID;
211196
/* get the expected sequence length */
212
- len = *def++;
197
+ len = *def;
213198
/* if there aren't enough bytes left, return invalid */
214199
if( n<len ) {
215200
return LOOK_INVALID;
216201
}
217202
/* we already know byte #0 is good, so check the remaining bytes */
218
- if( (*z<*def++) || (*z++>*def++) ){
203
+ if( (*z<*++def) || (*z++>*++def) ){
219204
/* if the byte is outside the allowed range for this definition,
220205
* return invalid */
221206
return LOOK_INVALID;
222207
}
223208
if( len > 2 ){
224209
--- src/lookslike.c
+++ src/lookslike.c
@@ -143,51 +143,38 @@
143 ** the derivatives CESU-8 & WTF-8 (as described in the same
144 ** wikipedia article referenced previously).
145 */
146
147 /* definitions for various UTF-8 sequence lengths */
148 static const unsigned char us2a[] = { /* for lead byte 0xC0 */
149 2, 0x80, 0x80
150 };
151 static const unsigned char us2b[] = { /* for lead bytes 0xC2-0xDF */
152 2, 0x80, 0xBF
153 };
154 static const unsigned char us3a[] = { /* for lead byte 0xE0 */
155 3, 0xA0, 0xBF
156 };
157 static const unsigned char us3b[] = { /* for lead bytes 0xE1-0xEF */
158 3, 0x80, 0xBF
159 };
160 static const unsigned char us4a[] = { /* for lead byte 0xF0 */
161 4, 0x90, 0xBF
162 };
163 static const unsigned char us4b[] = { /* for lead bytes 0xF1-0xF3 */
164 4, 0x80, 0xBF
165 };
166 static const unsigned char us4c[] = { /* for lead byte 0xF4 */
167 4, 0x80, 0x8F
168 };
169
170 /* a table used for quick lookup of the definition that goes with a
171 * particular lead byte */
172 static const unsigned char* const lb_tab[] = {
173 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
174 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
175 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
176 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
177 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
178 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
179 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
180 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
181 us2a, NULL, us2b, us2b, us2b, us2b, us2b, us2b,
182 us2b, us2b, us2b, us2b, us2b, us2b, us2b, us2b,
183 us2b, us2b, us2b, us2b, us2b, us2b, us2b, us2b,
184 us2b, us2b, us2b, us2b, us2b, us2b, us2b, us2b,
185 us3a, us3b, us3b, us3b, us3b, us3b, us3b, us3b,
186 us3b, us3b, us3b, us3b, us3b, us3b, us3b, us3b,
187 us4a, us4b, us4b, us4b, us4c, NULL, NULL, NULL,
188 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL
189 };
190
191 int invalid_utf8(
192 const Blob *pContent
193 ){
@@ -201,23 +188,21 @@
201 if( *z<0x80 ){
202 ++z;
203 --n;
204 }else{
205 /* get the definition for this lead byte */
206 const unsigned char* def = lb_tab[(*z++)-0x80];
207 unsigned char len;
208
209 /* if the definition doesn't exist, return invalid */
210 if( !def ) return LOOK_INVALID;
211 /* get the expected sequence length */
212 len = *def++;
213 /* if there aren't enough bytes left, return invalid */
214 if( n<len ) {
215 return LOOK_INVALID;
216 }
217 /* we already know byte #0 is good, so check the remaining bytes */
218 if( (*z<*def++) || (*z++>*def++) ){
219 /* if the byte is outside the allowed range for this definition,
220 * return invalid */
221 return LOOK_INVALID;
222 }
223 if( len > 2 ){
224
--- src/lookslike.c
+++ src/lookslike.c
@@ -143,51 +143,38 @@
143 ** the derivatives CESU-8 & WTF-8 (as described in the same
144 ** wikipedia article referenced previously).
145 */
146
147 /* definitions for various UTF-8 sequence lengths */
148 #define US2A 2, 0x80, 0x80 /* for lead byte 0xC0 */
149 #define US2B 2, 0x80, 0xBF /* for lead bytes 0xC2-0xDF */
150 #define US3A 3, 0xA0, 0xBF /* for lead byte 0xE0 */
151 #define US3B 3, 0x80, 0xBF /* for lead bytes 0xE1-0xEF */
152 #define US4A 4, 0x90, 0xBF /* for lead byte 0xF0 */
153 #define US4B 4, 0x80, 0xBF /* for lead bytes 0xF1-0xF3 */
154 #define US4C 4, 0x80, 0x8F /* for lead byte 0xF4 */
155 #define US0A 0xFF, 0xFF, 0x00 /* for any other lead byte */
 
 
 
 
 
 
 
 
 
 
 
 
 
156
157 /* a table used for quick lookup of the definition that goes with a
158 * particular lead byte */
159 static const unsigned char lb_tab[] = {
160 US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A,
161 US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A,
162 US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A,
163 US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A,
164 US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A,
165 US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A,
166 US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A,
167 US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A,
168 US2A, US0A, US2B, US2B, US2B, US2B, US2B, US2B,
169 US2B, US2B, US2B, US2B, US2B, US2B, US2B, US2B,
170 US2B, US2B, US2B, US2B, US2B, US2B, US2B, US2B,
171 US2B, US2B, US2B, US2B, US2B, US2B, US2B, US2B,
172 US3A, US3B, US3B, US3B, US3B, US3B, US3B, US3B,
173 US3B, US3B, US3B, US3B, US3B, US3B, US3B, US3B,
174 US4A, US4B, US4B, US4B, US4C, US0A, US0A, US0A,
175 US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A
176 };
177
178 int invalid_utf8(
179 const Blob *pContent
180 ){
@@ -201,23 +188,21 @@
188 if( *z<0x80 ){
189 ++z;
190 --n;
191 }else{
192 /* get the definition for this lead byte */
193 const unsigned char* def = &lb_tab[(3 * *z++)-0x180];
194 unsigned char len;
195
 
 
196 /* get the expected sequence length */
197 len = *def;
198 /* if there aren't enough bytes left, return invalid */
199 if( n<len ) {
200 return LOOK_INVALID;
201 }
202 /* we already know byte #0 is good, so check the remaining bytes */
203 if( (*z<*++def) || (*z++>*++def) ){
204 /* if the byte is outside the allowed range for this definition,
205 * return invalid */
206 return LOOK_INVALID;
207 }
208 if( len > 2 ){
209
+28 -43
--- src/lookslike.c
+++ src/lookslike.c
@@ -143,51 +143,38 @@
143143
** the derivatives CESU-8 & WTF-8 (as described in the same
144144
** wikipedia article referenced previously).
145145
*/
146146
147147
/* definitions for various UTF-8 sequence lengths */
148
-static const unsigned char us2a[] = { /* for lead byte 0xC0 */
149
- 2, 0x80, 0x80
150
-};
151
-static const unsigned char us2b[] = { /* for lead bytes 0xC2-0xDF */
152
- 2, 0x80, 0xBF
153
-};
154
-static const unsigned char us3a[] = { /* for lead byte 0xE0 */
155
- 3, 0xA0, 0xBF
156
-};
157
-static const unsigned char us3b[] = { /* for lead bytes 0xE1-0xEF */
158
- 3, 0x80, 0xBF
159
-};
160
-static const unsigned char us4a[] = { /* for lead byte 0xF0 */
161
- 4, 0x90, 0xBF
162
-};
163
-static const unsigned char us4b[] = { /* for lead bytes 0xF1-0xF3 */
164
- 4, 0x80, 0xBF
165
-};
166
-static const unsigned char us4c[] = { /* for lead byte 0xF4 */
167
- 4, 0x80, 0x8F
168
-};
148
+#define US2A 2, 0x80, 0x80 /* for lead byte 0xC0 */
149
+#define US2B 2, 0x80, 0xBF /* for lead bytes 0xC2-0xDF */
150
+#define US3A 3, 0xA0, 0xBF /* for lead byte 0xE0 */
151
+#define US3B 3, 0x80, 0xBF /* for lead bytes 0xE1-0xEF */
152
+#define US4A 4, 0x90, 0xBF /* for lead byte 0xF0 */
153
+#define US4B 4, 0x80, 0xBF /* for lead bytes 0xF1-0xF3 */
154
+#define US4C 4, 0x80, 0x8F /* for lead byte 0xF4 */
155
+#define US0A 0xFF, 0xFF, 0x00 /* for any other lead byte */
169156
170157
/* a table used for quick lookup of the definition that goes with a
171158
* particular lead byte */
172
-static const unsigned char* const lb_tab[] = {
173
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
174
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
175
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
176
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
177
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
178
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
179
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
180
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
181
- us2a, NULL, us2b, us2b, us2b, us2b, us2b, us2b,
182
- us2b, us2b, us2b, us2b, us2b, us2b, us2b, us2b,
183
- us2b, us2b, us2b, us2b, us2b, us2b, us2b, us2b,
184
- us2b, us2b, us2b, us2b, us2b, us2b, us2b, us2b,
185
- us3a, us3b, us3b, us3b, us3b, us3b, us3b, us3b,
186
- us3b, us3b, us3b, us3b, us3b, us3b, us3b, us3b,
187
- us4a, us4b, us4b, us4b, us4c, NULL, NULL, NULL,
188
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL
159
+static const unsigned char lb_tab[] = {
160
+ US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A,
161
+ US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A,
162
+ US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A,
163
+ US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A,
164
+ US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A,
165
+ US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A,
166
+ US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A,
167
+ US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A,
168
+ US2A, US0A, US2B, US2B, US2B, US2B, US2B, US2B,
169
+ US2B, US2B, US2B, US2B, US2B, US2B, US2B, US2B,
170
+ US2B, US2B, US2B, US2B, US2B, US2B, US2B, US2B,
171
+ US2B, US2B, US2B, US2B, US2B, US2B, US2B, US2B,
172
+ US3A, US3B, US3B, US3B, US3B, US3B, US3B, US3B,
173
+ US3B, US3B, US3B, US3B, US3B, US3B, US3B, US3B,
174
+ US4A, US4B, US4B, US4B, US4C, US0A, US0A, US0A,
175
+ US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A
189176
};
190177
191178
int invalid_utf8(
192179
const Blob *pContent
193180
){
@@ -201,23 +188,21 @@
201188
if( *z<0x80 ){
202189
++z;
203190
--n;
204191
}else{
205192
/* get the definition for this lead byte */
206
- const unsigned char* def = lb_tab[(*z++)-0x80];
193
+ const unsigned char* def = &lb_tab[(3 * *z++)-0x180];
207194
unsigned char len;
208195
209
- /* if the definition doesn't exist, return invalid */
210
- if( !def ) return LOOK_INVALID;
211196
/* get the expected sequence length */
212
- len = *def++;
197
+ len = *def;
213198
/* if there aren't enough bytes left, return invalid */
214199
if( n<len ) {
215200
return LOOK_INVALID;
216201
}
217202
/* we already know byte #0 is good, so check the remaining bytes */
218
- if( (*z<*def++) || (*z++>*def++) ){
203
+ if( (*z<*++def) || (*z++>*++def) ){
219204
/* if the byte is outside the allowed range for this definition,
220205
* return invalid */
221206
return LOOK_INVALID;
222207
}
223208
if( len > 2 ){
224209
--- src/lookslike.c
+++ src/lookslike.c
@@ -143,51 +143,38 @@
143 ** the derivatives CESU-8 & WTF-8 (as described in the same
144 ** wikipedia article referenced previously).
145 */
146
147 /* definitions for various UTF-8 sequence lengths */
148 static const unsigned char us2a[] = { /* for lead byte 0xC0 */
149 2, 0x80, 0x80
150 };
151 static const unsigned char us2b[] = { /* for lead bytes 0xC2-0xDF */
152 2, 0x80, 0xBF
153 };
154 static const unsigned char us3a[] = { /* for lead byte 0xE0 */
155 3, 0xA0, 0xBF
156 };
157 static const unsigned char us3b[] = { /* for lead bytes 0xE1-0xEF */
158 3, 0x80, 0xBF
159 };
160 static const unsigned char us4a[] = { /* for lead byte 0xF0 */
161 4, 0x90, 0xBF
162 };
163 static const unsigned char us4b[] = { /* for lead bytes 0xF1-0xF3 */
164 4, 0x80, 0xBF
165 };
166 static const unsigned char us4c[] = { /* for lead byte 0xF4 */
167 4, 0x80, 0x8F
168 };
169
170 /* a table used for quick lookup of the definition that goes with a
171 * particular lead byte */
172 static const unsigned char* const lb_tab[] = {
173 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
174 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
175 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
176 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
177 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
178 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
179 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
180 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
181 us2a, NULL, us2b, us2b, us2b, us2b, us2b, us2b,
182 us2b, us2b, us2b, us2b, us2b, us2b, us2b, us2b,
183 us2b, us2b, us2b, us2b, us2b, us2b, us2b, us2b,
184 us2b, us2b, us2b, us2b, us2b, us2b, us2b, us2b,
185 us3a, us3b, us3b, us3b, us3b, us3b, us3b, us3b,
186 us3b, us3b, us3b, us3b, us3b, us3b, us3b, us3b,
187 us4a, us4b, us4b, us4b, us4c, NULL, NULL, NULL,
188 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL
189 };
190
191 int invalid_utf8(
192 const Blob *pContent
193 ){
@@ -201,23 +188,21 @@
201 if( *z<0x80 ){
202 ++z;
203 --n;
204 }else{
205 /* get the definition for this lead byte */
206 const unsigned char* def = lb_tab[(*z++)-0x80];
207 unsigned char len;
208
209 /* if the definition doesn't exist, return invalid */
210 if( !def ) return LOOK_INVALID;
211 /* get the expected sequence length */
212 len = *def++;
213 /* if there aren't enough bytes left, return invalid */
214 if( n<len ) {
215 return LOOK_INVALID;
216 }
217 /* we already know byte #0 is good, so check the remaining bytes */
218 if( (*z<*def++) || (*z++>*def++) ){
219 /* if the byte is outside the allowed range for this definition,
220 * return invalid */
221 return LOOK_INVALID;
222 }
223 if( len > 2 ){
224
--- src/lookslike.c
+++ src/lookslike.c
@@ -143,51 +143,38 @@
143 ** the derivatives CESU-8 & WTF-8 (as described in the same
144 ** wikipedia article referenced previously).
145 */
146
147 /* definitions for various UTF-8 sequence lengths */
148 #define US2A 2, 0x80, 0x80 /* for lead byte 0xC0 */
149 #define US2B 2, 0x80, 0xBF /* for lead bytes 0xC2-0xDF */
150 #define US3A 3, 0xA0, 0xBF /* for lead byte 0xE0 */
151 #define US3B 3, 0x80, 0xBF /* for lead bytes 0xE1-0xEF */
152 #define US4A 4, 0x90, 0xBF /* for lead byte 0xF0 */
153 #define US4B 4, 0x80, 0xBF /* for lead bytes 0xF1-0xF3 */
154 #define US4C 4, 0x80, 0x8F /* for lead byte 0xF4 */
155 #define US0A 0xFF, 0xFF, 0x00 /* for any other lead byte */
 
 
 
 
 
 
 
 
 
 
 
 
 
156
157 /* a table used for quick lookup of the definition that goes with a
158 * particular lead byte */
159 static const unsigned char lb_tab[] = {
160 US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A,
161 US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A,
162 US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A,
163 US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A,
164 US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A,
165 US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A,
166 US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A,
167 US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A,
168 US2A, US0A, US2B, US2B, US2B, US2B, US2B, US2B,
169 US2B, US2B, US2B, US2B, US2B, US2B, US2B, US2B,
170 US2B, US2B, US2B, US2B, US2B, US2B, US2B, US2B,
171 US2B, US2B, US2B, US2B, US2B, US2B, US2B, US2B,
172 US3A, US3B, US3B, US3B, US3B, US3B, US3B, US3B,
173 US3B, US3B, US3B, US3B, US3B, US3B, US3B, US3B,
174 US4A, US4B, US4B, US4B, US4C, US0A, US0A, US0A,
175 US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A
176 };
177
178 int invalid_utf8(
179 const Blob *pContent
180 ){
@@ -201,23 +188,21 @@
188 if( *z<0x80 ){
189 ++z;
190 --n;
191 }else{
192 /* get the definition for this lead byte */
193 const unsigned char* def = &lb_tab[(3 * *z++)-0x180];
194 unsigned char len;
195
 
 
196 /* get the expected sequence length */
197 len = *def;
198 /* if there aren't enough bytes left, return invalid */
199 if( n<len ) {
200 return LOOK_INVALID;
201 }
202 /* we already know byte #0 is good, so check the remaining bytes */
203 if( (*z<*++def) || (*z++>*++def) ){
204 /* if the byte is outside the allowed range for this definition,
205 * return invalid */
206 return LOOK_INVALID;
207 }
208 if( len > 2 ){
209

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button