Fossil SCM

Simplifications and constification

jan.nijtmans 2016-06-15 08:19 invalid_utf8_table
Commit 6051c441edda95837da361f9bb764befb93954b0
1 file changed +10 -12
+10 -12
--- src/lookslike.c
+++ src/lookslike.c
@@ -144,34 +144,34 @@
144144
** wikipedia article referenced previously).
145145
*/
146146
147147
/* definitions for various UTF-8 sequence lengths */
148148
static const unsigned char us2a[] = {
149
- 2, 0xC0, 0xC0, 0x80, 0x80
149
+ 2, 0x80, 0x80
150150
};
151151
static const unsigned char us2b[] = {
152
- 2, 0xC2, 0xDF, 0x80, 0xBF
152
+ 2, 0x80, 0xBF
153153
};
154154
static const unsigned char us3a[] = {
155
- 3, 0xE0, 0xE0, 0xA0, 0xBF, 0x80, 0xBF
155
+ 3, 0xA0, 0xBF, 0x80, 0xBF
156156
};
157157
static const unsigned char us3b[] = {
158
- 3, 0xE1, 0xEF, 0x80, 0xBF, 0x80, 0xBF
158
+ 3, 0x80, 0xBF, 0x80, 0xBF
159159
};
160160
static const unsigned char us4a[] = {
161
- 4, 0xF0, 0xF0, 0x90, 0xBF, 0x80, 0xBF, 0x80, 0xBF
161
+ 4, 0x90, 0xBF, 0x80, 0xBF, 0x80, 0xBF
162162
};
163163
static const unsigned char us4b[] = {
164
- 4, 0xF1, 0xF3, 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF
164
+ 4, 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF
165165
};
166166
static const unsigned char us4c[] = {
167
- 4, 0xF4, 0xF4, 0x80, 0x8F, 0x80, 0xBF, 0x80, 0xBF
167
+ 4, 0x80, 0x8F, 0x80, 0xBF, 0x80, 0xBF
168168
};
169169
170170
/* a table used for quick lookup of the definition that goes with a
171171
* particular lead byte */
172
-static const unsigned char* lb_tab[] = {
172
+static const unsigned char* const lb_tab[] = {
173173
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
174174
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
175175
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
176176
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
177177
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
@@ -201,21 +201,19 @@
201201
if( *z<0x80 ){
202202
++z;
203203
--n;
204204
}else{
205205
/* get the definition for this lead byte */
206
- unsigned char* def = lb_tab[(*z++)-0x80];
206
+ const unsigned char* def = lb_tab[(*z++)-0x80];
207207
unsigned char i, len;
208208
209209
/* if the definition doesn't exist, return invalid */
210210
if( !def ) return LOOK_INVALID;
211211
/* get the expected sequence length */
212
- len = *def;
212
+ len = *def++;
213213
/* if there aren't enough bytes left, return invalid */
214214
if( n<len ) return LOOK_INVALID;
215
- /* skip the length & lead byte range */
216
- def += 3;
217215
/* we already know byte #0 is good, so check the remaining bytes */
218216
for(i=1; i<len; ++i){
219217
/* if the byte is outside the allowed range for this definition,
220218
* return invalid */
221219
if( (*z<*def++) || (*z++>*def++) ){
222220
--- src/lookslike.c
+++ src/lookslike.c
@@ -144,34 +144,34 @@
144 ** wikipedia article referenced previously).
145 */
146
147 /* definitions for various UTF-8 sequence lengths */
148 static const unsigned char us2a[] = {
149 2, 0xC0, 0xC0, 0x80, 0x80
150 };
151 static const unsigned char us2b[] = {
152 2, 0xC2, 0xDF, 0x80, 0xBF
153 };
154 static const unsigned char us3a[] = {
155 3, 0xE0, 0xE0, 0xA0, 0xBF, 0x80, 0xBF
156 };
157 static const unsigned char us3b[] = {
158 3, 0xE1, 0xEF, 0x80, 0xBF, 0x80, 0xBF
159 };
160 static const unsigned char us4a[] = {
161 4, 0xF0, 0xF0, 0x90, 0xBF, 0x80, 0xBF, 0x80, 0xBF
162 };
163 static const unsigned char us4b[] = {
164 4, 0xF1, 0xF3, 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF
165 };
166 static const unsigned char us4c[] = {
167 4, 0xF4, 0xF4, 0x80, 0x8F, 0x80, 0xBF, 0x80, 0xBF
168 };
169
170 /* a table used for quick lookup of the definition that goes with a
171 * particular lead byte */
172 static const unsigned char* lb_tab[] = {
173 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
174 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
175 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
176 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
177 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
@@ -201,21 +201,19 @@
201 if( *z<0x80 ){
202 ++z;
203 --n;
204 }else{
205 /* get the definition for this lead byte */
206 unsigned char* def = lb_tab[(*z++)-0x80];
207 unsigned char i, len;
208
209 /* if the definition doesn't exist, return invalid */
210 if( !def ) return LOOK_INVALID;
211 /* get the expected sequence length */
212 len = *def;
213 /* if there aren't enough bytes left, return invalid */
214 if( n<len ) return LOOK_INVALID;
215 /* skip the length & lead byte range */
216 def += 3;
217 /* we already know byte #0 is good, so check the remaining bytes */
218 for(i=1; i<len; ++i){
219 /* if the byte is outside the allowed range for this definition,
220 * return invalid */
221 if( (*z<*def++) || (*z++>*def++) ){
222
--- src/lookslike.c
+++ src/lookslike.c
@@ -144,34 +144,34 @@
144 ** wikipedia article referenced previously).
145 */
146
147 /* definitions for various UTF-8 sequence lengths */
148 static const unsigned char us2a[] = {
149 2, 0x80, 0x80
150 };
151 static const unsigned char us2b[] = {
152 2, 0x80, 0xBF
153 };
154 static const unsigned char us3a[] = {
155 3, 0xA0, 0xBF, 0x80, 0xBF
156 };
157 static const unsigned char us3b[] = {
158 3, 0x80, 0xBF, 0x80, 0xBF
159 };
160 static const unsigned char us4a[] = {
161 4, 0x90, 0xBF, 0x80, 0xBF, 0x80, 0xBF
162 };
163 static const unsigned char us4b[] = {
164 4, 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF
165 };
166 static const unsigned char us4c[] = {
167 4, 0x80, 0x8F, 0x80, 0xBF, 0x80, 0xBF
168 };
169
170 /* a table used for quick lookup of the definition that goes with a
171 * particular lead byte */
172 static const unsigned char* const lb_tab[] = {
173 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
174 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
175 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
176 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
177 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
@@ -201,21 +201,19 @@
201 if( *z<0x80 ){
202 ++z;
203 --n;
204 }else{
205 /* get the definition for this lead byte */
206 const unsigned char* def = lb_tab[(*z++)-0x80];
207 unsigned char i, len;
208
209 /* if the definition doesn't exist, return invalid */
210 if( !def ) return LOOK_INVALID;
211 /* get the expected sequence length */
212 len = *def++;
213 /* if there aren't enough bytes left, return invalid */
214 if( n<len ) return LOOK_INVALID;
 
 
215 /* we already know byte #0 is good, so check the remaining bytes */
216 for(i=1; i<len; ++i){
217 /* if the byte is outside the allowed range for this definition,
218 * return invalid */
219 if( (*z<*def++) || (*z++>*def++) ){
220

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button