Fossil SCM
reformatted invalid_utf8 to make it conform a bit better to existing style
Commit
dd3bb22cd726f26e785776d50f605a469a03870d
Parent
4f906e5357b33e6…
1 file changed
+9
-19
+9
-19
| --- src/lookslike.c | ||
| +++ src/lookslike.c | ||
| @@ -143,12 +143,11 @@ | ||
| 143 | 143 | ** Java and Tcl use it. This function also considers valid |
| 144 | 144 | ** the derivatives CESU-8 & WTF-8 (as described in the same |
| 145 | 145 | ** wikipedia article referenced previously). |
| 146 | 146 | */ |
| 147 | 147 | |
| 148 | -int invalid_utf8(const Blob *pContent) | |
| 149 | -{ | |
| 148 | +int invalid_utf8(const Blob *pContent) { | |
| 150 | 149 | /* definitions for various utf-8 sequence lengths */ |
| 151 | 150 | static unsigned char def_2a[] = { 2, 0xC0, 0xC0, 0x80, 0x80 }; |
| 152 | 151 | static unsigned char def_2b[] = { 2, 0xC2, 0xDF, 0x80, 0xBF }; |
| 153 | 152 | static unsigned char def_3a[] = { 3, 0xE0, 0xE0, 0xA0, 0xBF, 0x80, 0xBF }; |
| 154 | 153 | static unsigned char def_3b[] = { 3, 0xE1, 0xEF, 0x80, 0xBF, 0x80, 0xBF }; |
| @@ -164,18 +163,16 @@ | ||
| 164 | 163 | |
| 165 | 164 | /* a pointer to the table; NULL means not yet setup */ |
| 166 | 165 | static unsigned char** lb_ptr = NULL; |
| 167 | 166 | |
| 168 | 167 | /* if the table pointer hasn't been initialized */ |
| 169 | - if (lb_ptr == NULL) | |
| 170 | - { | |
| 168 | + if (lb_ptr == NULL) { | |
| 171 | 169 | lb_ptr = lb_tab; |
| 172 | 170 | |
| 173 | 171 | /* for each definition, set the lead byte table pointer to the proper definition */ |
| 174 | 172 | unsigned char** pp = def_arr; |
| 175 | - while (*pp != NULL) | |
| 176 | - { | |
| 173 | + while (*pp != NULL) { | |
| 177 | 174 | unsigned char lo = pp[0][1]; |
| 178 | 175 | unsigned char hi = pp[0][2]; |
| 179 | 176 | unsigned char i; |
| 180 | 177 | for (i = lo; i <= hi; ++i) |
| 181 | 178 | lb_ptr[i] = pp[0]; |
| @@ -186,45 +183,38 @@ | ||
| 186 | 183 | /* buffer pointer and size */ |
| 187 | 184 | const unsigned char *z = (unsigned char *)blob_buffer(pContent); |
| 188 | 185 | unsigned int n = blob_size(pContent); |
| 189 | 186 | |
| 190 | 187 | /* while we haven't checked all the bytes in the buffer */ |
| 191 | - while (n > 0) | |
| 192 | - { | |
| 188 | + while (n > 0) { | |
| 189 | + | |
| 193 | 190 | /* ascii is trivial */ |
| 194 | - if (*z < 0x80) | |
| 195 | - { | |
| 191 | + if (*z < 0x80) { | |
| 196 | 192 | ++z; |
| 197 | 193 | --n; |
| 198 | - } | |
| 199 | - else | |
| 200 | - { | |
| 194 | + } else { | |
| 201 | 195 | /* get the definition for this lead byte */ |
| 202 | 196 | unsigned char* def = lb_ptr[*z++]; |
| 203 | 197 | unsigned char i, len; |
| 204 | 198 | |
| 205 | 199 | /* if the definition doesn't exist, return invalid */ |
| 206 | - if (!def) | |
| 207 | - return LOOK_INVALID; | |
| 200 | + if (!def) return LOOK_INVALID; | |
| 208 | 201 | |
| 209 | 202 | /* get the expected sequence length */ |
| 210 | 203 | len = *def; |
| 211 | 204 | |
| 212 | 205 | /* if there aren't enough bytes left, return invalid */ |
| 213 | - if (n < len) | |
| 214 | - return LOOK_INVALID; | |
| 206 | + if (n < len) return LOOK_INVALID; | |
| 215 | 207 | |
| 216 | 208 | /* skip the length & lead byte range */ |
| 217 | 209 | def += 3; |
| 218 | 210 | |
| 219 | 211 | /* we already know byte #0 is good, so check the remaining bytes */ |
| 220 | 212 | for (i = 1; i < len; ++i) |
| 221 | - { | |
| 222 | 213 | /* if the byte is outside the allowed range for this definition, return invalid */ |
| 223 | 214 | if ((*z < *def++) || (*z++ > *def++)) |
| 224 | 215 | return LOOK_INVALID; |
| 225 | - } | |
| 226 | 216 | |
| 227 | 217 | /* advance to the next sequence */ |
| 228 | 218 | n -= len; |
| 229 | 219 | } |
| 230 | 220 | } |
| 231 | 221 |
| --- src/lookslike.c | |
| +++ src/lookslike.c | |
| @@ -143,12 +143,11 @@ | |
| 143 | ** Java and Tcl use it. This function also considers valid |
| 144 | ** the derivatives CESU-8 & WTF-8 (as described in the same |
| 145 | ** wikipedia article referenced previously). |
| 146 | */ |
| 147 | |
| 148 | int invalid_utf8(const Blob *pContent) |
| 149 | { |
| 150 | /* definitions for various utf-8 sequence lengths */ |
| 151 | static unsigned char def_2a[] = { 2, 0xC0, 0xC0, 0x80, 0x80 }; |
| 152 | static unsigned char def_2b[] = { 2, 0xC2, 0xDF, 0x80, 0xBF }; |
| 153 | static unsigned char def_3a[] = { 3, 0xE0, 0xE0, 0xA0, 0xBF, 0x80, 0xBF }; |
| 154 | static unsigned char def_3b[] = { 3, 0xE1, 0xEF, 0x80, 0xBF, 0x80, 0xBF }; |
| @@ -164,18 +163,16 @@ | |
| 164 | |
| 165 | /* a pointer to the table; NULL means not yet setup */ |
| 166 | static unsigned char** lb_ptr = NULL; |
| 167 | |
| 168 | /* if the table pointer hasn't been initialized */ |
| 169 | if (lb_ptr == NULL) |
| 170 | { |
| 171 | lb_ptr = lb_tab; |
| 172 | |
| 173 | /* for each definition, set the lead byte table pointer to the proper definition */ |
| 174 | unsigned char** pp = def_arr; |
| 175 | while (*pp != NULL) |
| 176 | { |
| 177 | unsigned char lo = pp[0][1]; |
| 178 | unsigned char hi = pp[0][2]; |
| 179 | unsigned char i; |
| 180 | for (i = lo; i <= hi; ++i) |
| 181 | lb_ptr[i] = pp[0]; |
| @@ -186,45 +183,38 @@ | |
| 186 | /* buffer pointer and size */ |
| 187 | const unsigned char *z = (unsigned char *)blob_buffer(pContent); |
| 188 | unsigned int n = blob_size(pContent); |
| 189 | |
| 190 | /* while we haven't checked all the bytes in the buffer */ |
| 191 | while (n > 0) |
| 192 | { |
| 193 | /* ascii is trivial */ |
| 194 | if (*z < 0x80) |
| 195 | { |
| 196 | ++z; |
| 197 | --n; |
| 198 | } |
| 199 | else |
| 200 | { |
| 201 | /* get the definition for this lead byte */ |
| 202 | unsigned char* def = lb_ptr[*z++]; |
| 203 | unsigned char i, len; |
| 204 | |
| 205 | /* if the definition doesn't exist, return invalid */ |
| 206 | if (!def) |
| 207 | return LOOK_INVALID; |
| 208 | |
| 209 | /* get the expected sequence length */ |
| 210 | len = *def; |
| 211 | |
| 212 | /* if there aren't enough bytes left, return invalid */ |
| 213 | if (n < len) |
| 214 | return LOOK_INVALID; |
| 215 | |
| 216 | /* skip the length & lead byte range */ |
| 217 | def += 3; |
| 218 | |
| 219 | /* we already know byte #0 is good, so check the remaining bytes */ |
| 220 | for (i = 1; i < len; ++i) |
| 221 | { |
| 222 | /* if the byte is outside the allowed range for this definition, return invalid */ |
| 223 | if ((*z < *def++) || (*z++ > *def++)) |
| 224 | return LOOK_INVALID; |
| 225 | } |
| 226 | |
| 227 | /* advance to the next sequence */ |
| 228 | n -= len; |
| 229 | } |
| 230 | } |
| 231 |
| --- src/lookslike.c | |
| +++ src/lookslike.c | |
| @@ -143,12 +143,11 @@ | |
| 143 | ** Java and Tcl use it. This function also considers valid |
| 144 | ** the derivatives CESU-8 & WTF-8 (as described in the same |
| 145 | ** wikipedia article referenced previously). |
| 146 | */ |
| 147 | |
| 148 | int invalid_utf8(const Blob *pContent) { |
| 149 | /* definitions for various utf-8 sequence lengths */ |
| 150 | static unsigned char def_2a[] = { 2, 0xC0, 0xC0, 0x80, 0x80 }; |
| 151 | static unsigned char def_2b[] = { 2, 0xC2, 0xDF, 0x80, 0xBF }; |
| 152 | static unsigned char def_3a[] = { 3, 0xE0, 0xE0, 0xA0, 0xBF, 0x80, 0xBF }; |
| 153 | static unsigned char def_3b[] = { 3, 0xE1, 0xEF, 0x80, 0xBF, 0x80, 0xBF }; |
| @@ -164,18 +163,16 @@ | |
| 163 | |
| 164 | /* a pointer to the table; NULL means not yet setup */ |
| 165 | static unsigned char** lb_ptr = NULL; |
| 166 | |
| 167 | /* if the table pointer hasn't been initialized */ |
| 168 | if (lb_ptr == NULL) { |
| 169 | lb_ptr = lb_tab; |
| 170 | |
| 171 | /* for each definition, set the lead byte table pointer to the proper definition */ |
| 172 | unsigned char** pp = def_arr; |
| 173 | while (*pp != NULL) { |
| 174 | unsigned char lo = pp[0][1]; |
| 175 | unsigned char hi = pp[0][2]; |
| 176 | unsigned char i; |
| 177 | for (i = lo; i <= hi; ++i) |
| 178 | lb_ptr[i] = pp[0]; |
| @@ -186,45 +183,38 @@ | |
| 183 | /* buffer pointer and size */ |
| 184 | const unsigned char *z = (unsigned char *)blob_buffer(pContent); |
| 185 | unsigned int n = blob_size(pContent); |
| 186 | |
| 187 | /* while we haven't checked all the bytes in the buffer */ |
| 188 | while (n > 0) { |
| 189 | |
| 190 | /* ascii is trivial */ |
| 191 | if (*z < 0x80) { |
| 192 | ++z; |
| 193 | --n; |
| 194 | } else { |
| 195 | /* get the definition for this lead byte */ |
| 196 | unsigned char* def = lb_ptr[*z++]; |
| 197 | unsigned char i, len; |
| 198 | |
| 199 | /* if the definition doesn't exist, return invalid */ |
| 200 | if (!def) return LOOK_INVALID; |
| 201 | |
| 202 | /* get the expected sequence length */ |
| 203 | len = *def; |
| 204 | |
| 205 | /* if there aren't enough bytes left, return invalid */ |
| 206 | if (n < len) return LOOK_INVALID; |
| 207 | |
| 208 | /* skip the length & lead byte range */ |
| 209 | def += 3; |
| 210 | |
| 211 | /* we already know byte #0 is good, so check the remaining bytes */ |
| 212 | for (i = 1; i < len; ++i) |
| 213 | /* if the byte is outside the allowed range for this definition, return invalid */ |
| 214 | if ((*z < *def++) || (*z++ > *def++)) |
| 215 | return LOOK_INVALID; |
| 216 | |
| 217 | /* advance to the next sequence */ |
| 218 | n -= len; |
| 219 | } |
| 220 | } |
| 221 |