Fossil SCM

reformatted invalid_utf8 to make it conform a bit better to existing style

sdr 2016-06-11 00:41 invalid_utf8_table
Commit dd3bb22cd726f26e785776d50f605a469a03870d
1 file changed +9 -19
+9 -19
--- src/lookslike.c
+++ src/lookslike.c
@@ -143,12 +143,11 @@
143143
** Java and Tcl use it. This function also considers valid
144144
** the derivatives CESU-8 & WTF-8 (as described in the same
145145
** wikipedia article referenced previously).
146146
*/
147147
148
-int invalid_utf8(const Blob *pContent)
149
-{
148
+int invalid_utf8(const Blob *pContent) {
150149
/* definitions for various utf-8 sequence lengths */
151150
static unsigned char def_2a[] = { 2, 0xC0, 0xC0, 0x80, 0x80 };
152151
static unsigned char def_2b[] = { 2, 0xC2, 0xDF, 0x80, 0xBF };
153152
static unsigned char def_3a[] = { 3, 0xE0, 0xE0, 0xA0, 0xBF, 0x80, 0xBF };
154153
static unsigned char def_3b[] = { 3, 0xE1, 0xEF, 0x80, 0xBF, 0x80, 0xBF };
@@ -164,18 +163,16 @@
164163
165164
/* a pointer to the table; NULL means not yet setup */
166165
static unsigned char** lb_ptr = NULL;
167166
168167
/* if the table pointer hasn't been initialized */
169
- if (lb_ptr == NULL)
170
- {
168
+ if (lb_ptr == NULL) {
171169
lb_ptr = lb_tab;
172170
173171
/* for each definition, set the lead byte table pointer to the proper definition */
174172
unsigned char** pp = def_arr;
175
- while (*pp != NULL)
176
- {
173
+ while (*pp != NULL) {
177174
unsigned char lo = pp[0][1];
178175
unsigned char hi = pp[0][2];
179176
unsigned char i;
180177
for (i = lo; i <= hi; ++i)
181178
lb_ptr[i] = pp[0];
@@ -186,45 +183,38 @@
186183
/* buffer pointer and size */
187184
const unsigned char *z = (unsigned char *)blob_buffer(pContent);
188185
unsigned int n = blob_size(pContent);
189186
190187
/* while we haven't checked all the bytes in the buffer */
191
- while (n > 0)
192
- {
188
+ while (n > 0) {
189
+
193190
/* ascii is trivial */
194
- if (*z < 0x80)
195
- {
191
+ if (*z < 0x80) {
196192
++z;
197193
--n;
198
- }
199
- else
200
- {
194
+ } else {
201195
/* get the definition for this lead byte */
202196
unsigned char* def = lb_ptr[*z++];
203197
unsigned char i, len;
204198
205199
/* if the definition doesn't exist, return invalid */
206
- if (!def)
207
- return LOOK_INVALID;
200
+ if (!def) return LOOK_INVALID;
208201
209202
/* get the expected sequence length */
210203
len = *def;
211204
212205
/* if there aren't enough bytes left, return invalid */
213
- if (n < len)
214
- return LOOK_INVALID;
206
+ if (n < len) return LOOK_INVALID;
215207
216208
/* skip the length & lead byte range */
217209
def += 3;
218210
219211
/* we already know byte #0 is good, so check the remaining bytes */
220212
for (i = 1; i < len; ++i)
221
- {
222213
/* if the byte is outside the allowed range for this definition, return invalid */
223214
if ((*z < *def++) || (*z++ > *def++))
224215
return LOOK_INVALID;
225
- }
226216
227217
/* advance to the next sequence */
228218
n -= len;
229219
}
230220
}
231221
--- src/lookslike.c
+++ src/lookslike.c
@@ -143,12 +143,11 @@
143 ** Java and Tcl use it. This function also considers valid
144 ** the derivatives CESU-8 & WTF-8 (as described in the same
145 ** wikipedia article referenced previously).
146 */
147
148 int invalid_utf8(const Blob *pContent)
149 {
150 /* definitions for various utf-8 sequence lengths */
151 static unsigned char def_2a[] = { 2, 0xC0, 0xC0, 0x80, 0x80 };
152 static unsigned char def_2b[] = { 2, 0xC2, 0xDF, 0x80, 0xBF };
153 static unsigned char def_3a[] = { 3, 0xE0, 0xE0, 0xA0, 0xBF, 0x80, 0xBF };
154 static unsigned char def_3b[] = { 3, 0xE1, 0xEF, 0x80, 0xBF, 0x80, 0xBF };
@@ -164,18 +163,16 @@
164
165 /* a pointer to the table; NULL means not yet setup */
166 static unsigned char** lb_ptr = NULL;
167
168 /* if the table pointer hasn't been initialized */
169 if (lb_ptr == NULL)
170 {
171 lb_ptr = lb_tab;
172
173 /* for each definition, set the lead byte table pointer to the proper definition */
174 unsigned char** pp = def_arr;
175 while (*pp != NULL)
176 {
177 unsigned char lo = pp[0][1];
178 unsigned char hi = pp[0][2];
179 unsigned char i;
180 for (i = lo; i <= hi; ++i)
181 lb_ptr[i] = pp[0];
@@ -186,45 +183,38 @@
186 /* buffer pointer and size */
187 const unsigned char *z = (unsigned char *)blob_buffer(pContent);
188 unsigned int n = blob_size(pContent);
189
190 /* while we haven't checked all the bytes in the buffer */
191 while (n > 0)
192 {
193 /* ascii is trivial */
194 if (*z < 0x80)
195 {
196 ++z;
197 --n;
198 }
199 else
200 {
201 /* get the definition for this lead byte */
202 unsigned char* def = lb_ptr[*z++];
203 unsigned char i, len;
204
205 /* if the definition doesn't exist, return invalid */
206 if (!def)
207 return LOOK_INVALID;
208
209 /* get the expected sequence length */
210 len = *def;
211
212 /* if there aren't enough bytes left, return invalid */
213 if (n < len)
214 return LOOK_INVALID;
215
216 /* skip the length & lead byte range */
217 def += 3;
218
219 /* we already know byte #0 is good, so check the remaining bytes */
220 for (i = 1; i < len; ++i)
221 {
222 /* if the byte is outside the allowed range for this definition, return invalid */
223 if ((*z < *def++) || (*z++ > *def++))
224 return LOOK_INVALID;
225 }
226
227 /* advance to the next sequence */
228 n -= len;
229 }
230 }
231
--- src/lookslike.c
+++ src/lookslike.c
@@ -143,12 +143,11 @@
143 ** Java and Tcl use it. This function also considers valid
144 ** the derivatives CESU-8 & WTF-8 (as described in the same
145 ** wikipedia article referenced previously).
146 */
147
148 int invalid_utf8(const Blob *pContent) {
 
149 /* definitions for various utf-8 sequence lengths */
150 static unsigned char def_2a[] = { 2, 0xC0, 0xC0, 0x80, 0x80 };
151 static unsigned char def_2b[] = { 2, 0xC2, 0xDF, 0x80, 0xBF };
152 static unsigned char def_3a[] = { 3, 0xE0, 0xE0, 0xA0, 0xBF, 0x80, 0xBF };
153 static unsigned char def_3b[] = { 3, 0xE1, 0xEF, 0x80, 0xBF, 0x80, 0xBF };
@@ -164,18 +163,16 @@
163
164 /* a pointer to the table; NULL means not yet setup */
165 static unsigned char** lb_ptr = NULL;
166
167 /* if the table pointer hasn't been initialized */
168 if (lb_ptr == NULL) {
 
169 lb_ptr = lb_tab;
170
171 /* for each definition, set the lead byte table pointer to the proper definition */
172 unsigned char** pp = def_arr;
173 while (*pp != NULL) {
 
174 unsigned char lo = pp[0][1];
175 unsigned char hi = pp[0][2];
176 unsigned char i;
177 for (i = lo; i <= hi; ++i)
178 lb_ptr[i] = pp[0];
@@ -186,45 +183,38 @@
183 /* buffer pointer and size */
184 const unsigned char *z = (unsigned char *)blob_buffer(pContent);
185 unsigned int n = blob_size(pContent);
186
187 /* while we haven't checked all the bytes in the buffer */
188 while (n > 0) {
189
190 /* ascii is trivial */
191 if (*z < 0x80) {
 
192 ++z;
193 --n;
194 } else {
 
 
195 /* get the definition for this lead byte */
196 unsigned char* def = lb_ptr[*z++];
197 unsigned char i, len;
198
199 /* if the definition doesn't exist, return invalid */
200 if (!def) return LOOK_INVALID;
 
201
202 /* get the expected sequence length */
203 len = *def;
204
205 /* if there aren't enough bytes left, return invalid */
206 if (n < len) return LOOK_INVALID;
 
207
208 /* skip the length & lead byte range */
209 def += 3;
210
211 /* we already know byte #0 is good, so check the remaining bytes */
212 for (i = 1; i < len; ++i)
 
213 /* if the byte is outside the allowed range for this definition, return invalid */
214 if ((*z < *def++) || (*z++ > *def++))
215 return LOOK_INVALID;
 
216
217 /* advance to the next sequence */
218 n -= len;
219 }
220 }
221

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button